File size: 5,939 Bytes
6b9d2e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb47347
 
 
6b9d2e8
cb47347
 
 
 
 
 
 
6b9d2e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import gradio as gr
import plotly.express as px
import requests

# INTERFACE WITH AUDIO TO AUDIO


def transcript(
    general_context, link_to_audio, voice, emotion, place, time, delete_history, state
):
    """this function manages speech-to-text to input Fnanswer function and text-to-speech with the Fnanswer output"""
    # load audio from a specific path
    audio_path = link_to_audio
    audio_array, sampling_rate = librosa.load(
        link_to_audio, sr=16000
    )  # "sr=16000" ensures that the sampling rate is as required

    # process the audio array
    input_features = processor(
        audio_array, sampling_rate, return_tensors="pt"
    ).input_features
    predicted_ids = modelw.generate(input_features)
    transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
    quest_processing = FnAnswer(
        general_context, transcription, place, time, delete_history, state
    )
    state = quest_processing[2]
    print("langue " + quest_processing[3])

    tts.tts_to_file(
        text=str(quest_processing[0]),
        file_path="output.wav",
        speaker_wav=f"Audio_Files/{voice}.wav",
        language=quest_processing[3],
        emotion="angry",
    )

    audio_path = "output.wav"
    return audio_path, state["context"], state


# to be able to use the microphone on chrome, you will have to go to chrome://flags/#unsafely-treat-insecure-origin-as-secure and enter http://10.186.115.21:7860/
# in "Insecure origins treated as secure", enable it and relaunch chrome

# example question:
# what's the weather like outside?
# What's the closest restaurant from here?


import gradio as gr

shortcut_js = """
<script>
function shortcuts(e) {
    var event = document.all ? window.event : e;
    switch (e.target.tagName.toLowerCase()) {
        case "input":
        case "textarea":
        break;
        default:
        if (e.key.toLowerCase() == "r" && e.ctrlKey) {
            console.log("recording")
            document.getElementById("recorder").start_recording();
        }
        if (e.key.toLowerCase() == "s" && e.ctrlKey) {
            console.log("stopping")
            document.getElementById("recorder").stop_recording();
        }
    }
}
document.addEventListener('keypress', shortcuts, false);
</script>
"""

# with gr.Blocks(head=shortcut_js) as demo:
#     action_button = gr.Button(value="Name", elem_id="recorder")
#     textbox = gr.Textbox()
#     action_button.click(lambda : "button pressed", None, textbox)

# demo.launch()


# Generate options for hours (00-23)
hour_options = [f"{i:02d}:00:00" for i in range(24)]

model_answer = ""
general_context = ""
# Define the initial state with some initial context.
print(general_context)
initial_state = {"context": general_context}
initial_context = initial_state["context"]
# Create the Gradio interface.


with gr.Blocks(theme=gr.themes.Default()) as demo:

    with gr.Row():
        with gr.Column(scale=1, min_width=300):
            time_picker = gr.Dropdown(
                choices=hour_options, label="What time is it?", value="08:00:00"
            )
            history = gr.Radio(
                ["Yes", "No"], label="Maintain the conversation history?", value="No"
            )
            voice_character = gr.Radio(
                choices=[
                    "Rick Sanches",
                    "Eddie Murphy",
                    "David Attenborough",
                    "Morgan Freeman",
                ],
                label="Choose a voice",
                value="Rick Sancher",
                show_label=True,
            )
            emotion = gr.Radio(
                choices=["Cheerful", "Grumpy"],
                label="Choose an emotion",
                value="Cheerful",
                show_label=True,
            )
            # place = gr.Radio(
            #     choices=[
            #         "Luxembourg Gare, Luxembourg",
            #         "Kirchberg Campus, Kirchberg",
            #         "Belval Campus, Belval",
            #         "Eiffel Tower, Paris",
            #         "Thionville, France",
            #     ],
            #     label="Choose a location for your car",
            #     value="Kirchberg Campus, Kirchberg",
            #     show_label=True,
            # )
            origin = gr.Textbox(
                value="Luxembourg Gare, Luxembourg", label="Origin", interactive=True
            )
            destination = gr.Textbox(
                value="Kirchberg Campus, Kirchberg",
                label="Destination",
                interactive=True,
            )
            recorder = gr.Audio(
                type="filepath", label="input audio", elem_id="recorder"
            )
        with gr.Column(scale=2, min_width=600):
            map_plot = gr.Plot()
            origin.submit(fn=calculate_route, outputs=map_plot)
            destination.submit(fn=calculate_route, outputs=map_plot)
            output_audio = gr.Audio(label="output audio")
            # map_if = gr.Interface(fn=plot_map, inputs=year_input, outputs=map_plot)

    # iface = gr.Interface(
    #     fn=transcript,
    #     inputs=[
    #         gr.Textbox(value=initial_context, visible=False),
    #         gr.Audio(type="filepath", label="input audio", elem_id="recorder"),
    #         voice_character,
    #         emotion,
    #         place,
    #         time_picker,
    #         history,
    #         gr.State(),  # This will keep track of the context state across interactions.
    #     ],
    #     outputs=[gr.Audio(label="output audio"), gr.Textbox(visible=False), gr.State()],
    #     head=shortcut_js,
    # )

# close all interfaces open to make the port available
gr.close_all()
# Launch the interface.

demo.queue().launch(
    debug=True, server_name="0.0.0.0", server_port=7860, ssl_verify=False
)

# iface.launch(debug=True, share=False, server_name="0.0.0.0", server_port=7860, ssl_verify=False)