awacke1 commited on
Commit
4206987
·
1 Parent(s): 3aa15c4

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.txt +13 -0
  2. app.py +168 -0
  3. packages.txt +2 -0
  4. requirements.txt +7 -0
README.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: 🗣️SpeakNowASR🧠Memory💾Gradio
3
+ emoji: 🗣️🧠💾
4
+ colorFrom: yellow
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 3.5
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import time
4
+ import librosa
5
+ import soundfile
6
+ import nemo.collections.asr as nemo_asr
7
+ import tempfile
8
+ import os
9
+ import uuid
10
+
11
+ from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration
12
+ import torch
13
+
14
+ # PersistDataset -----
15
+ import os
16
+ import csv
17
+ import gradio as gr
18
+ from gradio import inputs, outputs
19
+ import huggingface_hub
20
+ from huggingface_hub import Repository, hf_hub_download, upload_file
21
+ from datetime import datetime
22
+
23
+ # ---------------------------------------------
24
+ # Dataset and Token links - change awacke1 to your own HF id, and add a HF_TOKEN copy to your repo for write permissions
25
+ # This should allow you to save your results to your own Dataset hosted on HF. ---
26
+ #DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/Carddata.csv"
27
+ #DATASET_REPO_ID = "awacke1/Carddata.csv"
28
+ #DATA_FILENAME = "Carddata.csv"
29
+ #DATA_FILE = os.path.join("data", DATA_FILENAME)
30
+ #HF_TOKEN = os.environ.get("HF_TOKEN")
31
+ #SCRIPT = """
32
+
33
+ #<script>
34
+ #if (!window.hasBeenRun) {
35
+ # window.hasBeenRun = true;
36
+ # console.log("should only happen once");
37
+ # document.querySelector("button.submit").click();
38
+ #}
39
+ #</script>
40
+ #"""
41
+
42
+ #try:
43
+ # hf_hub_download(
44
+ # repo_id=DATASET_REPO_ID,
45
+ # filename=DATA_FILENAME,
46
+ # cache_dir=DATA_DIRNAME,
47
+ # force_filename=DATA_FILENAME
48
+ # )
49
+ #except:
50
+ # print("file not found")
51
+ #repo = Repository(
52
+ # local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
53
+ #)
54
+
55
+ #def store_message(name: str, message: str):
56
+ # if name and message:
57
+ # with open(DATA_FILE, "a") as csvfile:
58
+ # writer = csv.DictWriter(csvfile, fieldnames=["name", "message", "time"])
59
+ # writer.writerow(
60
+ # {"name": name.strip(), "message": message.strip(), "time": str(datetime.now())}
61
+ # )
62
+ # # uncomment line below to begin saving -
63
+ # commit_url = repo.push_to_hub()
64
+ # return ""
65
+
66
+ #iface = gr.Interface(
67
+ # store_message,
68
+ # [
69
+ # inputs.Textbox(placeholder="Your name"),
70
+ # inputs.Textbox(placeholder="Your message", lines=2),
71
+ # ],
72
+ # "html",
73
+ # css="""
74
+ # .message {background-color:cornflowerblue;color:white; padding:4px;margin:4px;border-radius:4px; }
75
+ # """,
76
+ # title="Reading/writing to a HuggingFace dataset repo from Spaces",
77
+ # description=f"This is a demo of how to do simple *shared data persistence* in a Gradio Space, backed by a dataset repo.",
78
+ # article=f"The dataset repo is [{DATASET_REPO_URL}]({DATASET_REPO_URL})",
79
+ #)
80
+
81
+
82
+ # main -------------------------
83
+ mname = "facebook/blenderbot-400M-distill"
84
+ model = BlenderbotForConditionalGeneration.from_pretrained(mname)
85
+ tokenizer = BlenderbotTokenizer.from_pretrained(mname)
86
+
87
+ def take_last_tokens(inputs, note_history, history):
88
+ """Filter the last 128 tokens"""
89
+ if inputs['input_ids'].shape[1] > 128:
90
+ inputs['input_ids'] = torch.tensor([inputs['input_ids'][0][-128:].tolist()])
91
+ inputs['attention_mask'] = torch.tensor([inputs['attention_mask'][0][-128:].tolist()])
92
+ note_history = ['</s> <s>'.join(note_history[0].split('</s> <s>')[2:])]
93
+ history = history[1:]
94
+ return inputs, note_history, history
95
+
96
+ def add_note_to_history(note, note_history):
97
+ """Add a note to the historical information"""
98
+ note_history.append(note)
99
+ note_history = '</s> <s>'.join(note_history)
100
+ return [note_history]
101
+
102
+
103
+ def chat(message, history):
104
+ history = history or []
105
+ if history:
106
+ history_useful = ['</s> <s>'.join([str(a[0])+'</s> <s>'+str(a[1]) for a in history])]
107
+ else:
108
+ history_useful = []
109
+ history_useful = add_note_to_history(message, history_useful)
110
+ inputs = tokenizer(history_useful, return_tensors="pt")
111
+ inputs, history_useful, history = take_last_tokens(inputs, history_useful, history)
112
+ reply_ids = model.generate(**inputs)
113
+ response = tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0]
114
+ history_useful = add_note_to_history(response, history_useful)
115
+ list_history = history_useful[0].split('</s> <s>')
116
+ history.append((list_history[-2], list_history[-1]))
117
+ # store_message(message, response) # Save to dataset - uncomment if you uncomment above to save inputs and outputs to your dataset
118
+ return history, history
119
+
120
+
121
+ SAMPLE_RATE = 16000
122
+ model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained("nvidia/stt_en_conformer_transducer_xlarge")
123
+ model.change_decoding_strategy(None)
124
+ model.eval()
125
+
126
+ def process_audio_file(file):
127
+ data, sr = librosa.load(file)
128
+ if sr != SAMPLE_RATE:
129
+ data = librosa.resample(data, orig_sr=sr, target_sr=SAMPLE_RATE)
130
+ # monochannel
131
+ data = librosa.to_mono(data)
132
+ return data
133
+
134
+
135
+ def transcribe(audio, state = ""):
136
+ if state is None:
137
+ state = ""
138
+ audio_data = process_audio_file(audio)
139
+ with tempfile.TemporaryDirectory() as tmpdir:
140
+ audio_path = os.path.join(tmpdir, f'audio_{uuid.uuid4()}.wav')
141
+ soundfile.write(audio_path, audio_data, SAMPLE_RATE)
142
+ transcriptions = model.transcribe([audio_path])
143
+ if type(transcriptions) == tuple and len(transcriptions) == 2:
144
+ transcriptions = transcriptions[0]
145
+ transcriptions = transcriptions[0]
146
+ # store_message(transcriptions, state) # Save to dataset - uncomment to store into a dataset - hint you will need your HF_TOKEN
147
+ state = state + transcriptions + " "
148
+ return state, state
149
+
150
+ iface = gr.Interface(
151
+ fn=transcribe,
152
+ inputs=[
153
+ gr.Audio(source="microphone", type='filepath', streaming=True),
154
+ "state",
155
+ ],
156
+ outputs=[
157
+ "textbox",
158
+ "state",
159
+ ],
160
+ layout="horizontal",
161
+ theme="huggingface",
162
+ title="🗣️LiveSpeechRecognition🧠Memory💾",
163
+ description=f"Live Automatic Speech Recognition (ASR) with Memory💾 Dataset.",
164
+ allow_flagging='never',
165
+ live=True,
166
+ # article=f"Result Output Saved to Memory💾 Dataset: [{DATASET_REPO_URL}]({DATASET_REPO_URL})"
167
+ )
168
+ iface.launch()
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ffmpeg
2
+ libsndfile1
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ nemo_toolkit[asr]
2
+ transformers
3
+ torch
4
+ gradio
5
+ Werkzeug
6
+ huggingface_hub
7
+ Pillow