whisperaudio_clone

Runtime error

File size: 4,008 Bytes

from transformers import pipeline
asr_pipe = pipeline("automatic-speech-recognition", model="Abdullah17/whisper-small-urdu")
transcript_pipe = pipeline("automatic-speech-recognition", model="ihanif/whisper-medium-urdu")
from difflib import SequenceMatcher
import json
import socket
import soundfile as sf
import gradio as gr
def get_local_ip():
    try:
        # Create a socket connection to a remote host (here, google.com)
        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        s.connect(("8.8.8.8", 80))
        local_ip = s.getsockname()[0]
        s.close()
        return local_ip
    except Exception as e:
        print(f"Error getting local IP: {e}")
        return None



with open("tasks.json", "r",encoding="utf-8") as json_file:
    urdu_data = json.load(json_file)
# List of commands
# commands = [
#     "نمائندے ایجنٹ نمائندہ",
#     "  سم  ایکٹیویٹ ",
#     " سم  بلاک بند ",
#     "موبائل پیکیجز انٹرنیٹ پیکیج",
#     " چالان جمع چلان",
#     " گانا "
# ]
# replies = [
# 1,2,
# ]
# Function to find the most similar command
def find_most_similar_command(statement, command_list):
    best_match = None
    highest_similarity = 0
    i=0
    for sub_list in command_list:
     for command in sub_list:
        similarity = SequenceMatcher(None, statement, command).ratio()
        print(i,"similarity",similarity)
        if similarity > highest_similarity:
            highest_similarity = similarity
            best_match = command
            reply=i
     i+=1

    return best_match,reply

transcript_only=["1","3","4"]
match_and_save=["2"]
col_names={'1':"name",'3':"address",'4':"order"}
def send_data_to_db(menu_id,col_value,order_id):
    import requests
    col_name=col_names[menu_id]
    # API endpoint URL
    url = 'https://pizzahut.softinfix.tech/api/save_order?'+col_name+'='+col_value+"&order_id"+"="+order_id
    payload = {}
    headers = {}

    response = requests.request("GET", url, headers=headers, data=payload)

    # Print response
    print(response.status_code)
    print(response.text)

def transcribe_the_command(audio,menu_id,order_id):
      local_ip = get_local_ip()
      if local_ip:
         print(f"Local IP Address: {local_ip}")
      else:
         print("Local IP could not be determined.")

      sample_rate, audio_data = audio
      file_name = "recorded_audio.wav"
      sf.write(file_name, audio_data, sample_rate)
    # Convert stereo to mono by averaging the two channels
      print(menu_id)

      if menu_id in transcript_only:
        transcript = transcript_pipe(file_name)["text"]
        col_value=transcript
        send_data_to_db(menu_id,col_value,order_id)
        print("data uploaded successfully!")
      elif menu_id in match_and_save:
        transcript = asr_pipe(file_name)["text"]
        commands=urdu_data[menu_id]
        most_similar_command,reply = find_most_similar_command(transcript, commands)
        print(f"Given Statement: {transcript}")
        print(f"Most Similar Command: {most_similar_command}\n")
        print(reply)
        send_data_to_db(menu_id,reply,order_id)
      else:
        transcript = asr_pipe(file_name)["text"]
        commands=urdu_data[menu_id]
        print(commands)
        most_similar_command,reply = find_most_similar_command(transcript, commands)
        print(f"Given Statement: {transcript}")
        print(f"Most Similar Command: {most_similar_command}\n")
        print(reply)
        return reply
# get_text_from_voice("urdu.wav")


iface = gr.Interface(
    fn=transcribe_the_command,
    inputs=[gr.inputs.Audio(label="Recorded Audio",source="microphone"),gr.inputs.Textbox(label="menu_id"),gr.inputs.Textbox(label="order_id")],
    outputs="text",
    title="Whisper Small Urdu Command",
    description="Realtime demo for Urdu speech recognition using a fine-tuned Whisper small model and outputting the estimated command on the basis of speech transcript.",
)

iface.launch()