File size: 5,834 Bytes
6b9d2e8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
import gradio as gr
import plotly.express as px
import requests
# INTERFACE WITH AUDIO TO AUDIO
def transcript(
general_context, link_to_audio, voice, emotion, place, time, delete_history, state
):
"""this function manages speech-to-text to input Fnanswer function and text-to-speech with the Fnanswer output"""
# load audio from a specific path
audio_path = link_to_audio
audio_array, sampling_rate = librosa.load(
link_to_audio, sr=16000
) # "sr=16000" ensures that the sampling rate is as required
# process the audio array
input_features = processor(
audio_array, sampling_rate, return_tensors="pt"
).input_features
predicted_ids = modelw.generate(input_features)
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
quest_processing = FnAnswer(
general_context, transcription, place, time, delete_history, state
)
state = quest_processing[2]
print("langue " + quest_processing[3])
tts.tts_to_file(
text=str(quest_processing[0]),
file_path="output.wav",
speaker_wav=f"Audio_Files/{voice}.wav",
language=quest_processing[3],
emotion="angry",
)
audio_path = "output.wav"
return audio_path, state["context"], state
# to be able to use the microphone on chrome, you will have to go to chrome://flags/#unsafely-treat-insecure-origin-as-secure and enter http://10.186.115.21:7860/
# in "Insecure origins treated as secure", enable it and relaunch chrome
# example question:
# what's the weather like outside?
# What's the closest restaurant from here?
import gradio as gr
shortcut_js = """
<script>
function shortcuts(e) {
var event = document.all ? window.event : e;
switch (e.target.tagName.toLowerCase()) {
case "input":
case "textarea":
break;
default:
if (e.key.toLowerCase() == "r" && e.ctrlKey) {
console.log("recording")
document.getElementById("recorder").start_recording();
}
if (e.key.toLowerCase() == "s" && e.ctrlKey) {
console.log("stopping")
document.getElementById("recorder").stop_recording();
}
}
}
document.addEventListener('keypress', shortcuts, false);
</script>
"""
# with gr.Blocks(head=shortcut_js) as demo:
# action_button = gr.Button(value="Name", elem_id="recorder")
# textbox = gr.Textbox()
# action_button.click(lambda : "button pressed", None, textbox)
# demo.launch()
# Generate options for hours (00-23)
hour_options = [f"{i:02d}:00:00" for i in range(24)]
model_answer = ""
general_context = ""
# Define the initial state with some initial context.
print(general_context)
initial_state = {"context": general_context}
initial_context = initial_state["context"]
# Create the Gradio interface.
with gr.Blocks(theme=gr.themes.Default()) as demo:
with gr.Row():
with gr.Column(scale=1, min_width=300):
time_picker = gr.Dropdown(
choices=hour_options, label="What time is it?", value="08:00:00"
)
history = gr.Radio(
["Yes", "No"], label="Maintain the conversation history?", value="No"
)
voice_character = gr.Radio(
choices=[
"Rick Sanches",
"Eddie Murphy",
"David Attenborough",
"Morgan Freeman",
],
label="Choose a voice",
value="Rick Sancher",
show_label=True,
)
emotion = gr.Radio(
choices=["Cheerful", "Grumpy"],
label="Choose an emotion",
value="Cheerful",
show_label=True,
)
# place = gr.Radio(
# choices=[
# "Luxembourg Gare, Luxembourg",
# "Kirchberg Campus, Kirchberg",
# "Belval Campus, Belval",
# "Eiffel Tower, Paris",
# "Thionville, France",
# ],
# label="Choose a location for your car",
# value="Kirchberg Campus, Kirchberg",
# show_label=True,
# )
origin = gr.Textbox(value="Luxembourg Gare, Luxembourg", label="Origin", interactive=True)
destination = gr.Textbox(
value="Kirchberg Campus, Kirchberg", label="Destination", interactive=True)
recorder = gr.Audio(type="filepath", label="input audio", elem_id="recorder")
with gr.Column(scale=2, min_width=600):
map_plot = gr.Plot()
origin.submit(fn=calculate_route, outputs=map_plot)
destination.submit(fn=calculate_route, outputs=map_plot)
output_audio = gr.Audio(label="output audio")
# map_if = gr.Interface(fn=plot_map, inputs=year_input, outputs=map_plot)
# iface = gr.Interface(
# fn=transcript,
# inputs=[
# gr.Textbox(value=initial_context, visible=False),
# gr.Audio(type="filepath", label="input audio", elem_id="recorder"),
# voice_character,
# emotion,
# place,
# time_picker,
# history,
# gr.State(), # This will keep track of the context state across interactions.
# ],
# outputs=[gr.Audio(label="output audio"), gr.Textbox(visible=False), gr.State()],
# head=shortcut_js,
# )
# close all interfaces open to make the port available
gr.close_all()
# Launch the interface.
demo.queue().launch(
debug=True, server_name="0.0.0.0", server_port=7860, ssl_verify=False
)
# iface.launch(debug=True, share=False, server_name="0.0.0.0", server_port=7860, ssl_verify=False)
|