chrisjay commited on
Commit
0724b59
·
1 Parent(s): d370dc9
Files changed (3) hide show
  1. app.py +42 -4
  2. data +1 -1
  3. requirements.txt +3 -1
app.py CHANGED
@@ -5,9 +5,12 @@ import os
5
  import csv
6
  import random
7
  import pandas as pd
 
8
  import gradio as gr
 
9
  from article import ARTICLE
10
  from utils import *
 
11
  import scipy.io.wavfile as wavf
12
  from huggingface_hub import Repository, upload_file
13
 
@@ -169,6 +172,40 @@ def save_record(language,text,record,number,age,gender,accent,number_history,cur
169
  next_number_image = f'number/best.gif'
170
  return output_string,next_number_image,number_history,next_number,done_recording,default_record
171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  def display_records():
173
  repo.git_pull()
174
  REPOSITORY_DATA_DIR = os.path.join(REPOSITORY_DIR,'data')
@@ -261,16 +298,17 @@ with block:
261
 
262
  save.click(save_record, inputs=[language,text,record,number,age,gender,accent,state,current_number,country,email,done_recording],outputs=[output_result,number,state,current_number,done_recording,record])
263
 
264
- with gr.TabItem('Listen') as listen_tab:
 
265
  gr.Markdown("Listen to the recordings contributed. You can find them <a href='https://huggingface.co/datasets/chrisjay/crowd-speech-africa' target='blank'>here</a>.")
266
  display_html = gr.HTML("""<div style="color: green">
267
- <p> ⌛ Please wait. Loading dataset... </p>
268
  </div>
269
  """)
270
-
271
 
272
  #listen = gr.Button("Listen")
273
- listen_tab.select(display_records,inputs=[],outputs=display_html)
274
  gr.Markdown(ARTICLE)
275
 
276
  block.launch()
 
5
  import csv
6
  import random
7
  import pandas as pd
8
+ import numpy as np
9
  import gradio as gr
10
+ from collections import Counter
11
  from article import ARTICLE
12
  from utils import *
13
+ import matplotlib.pyplot as plt
14
  import scipy.io.wavfile as wavf
15
  from huggingface_hub import Repository, upload_file
16
 
 
172
  next_number_image = f'number/best.gif'
173
  return output_string,next_number_image,number_history,next_number,done_recording,default_record
174
 
175
+
176
+ def show_records():
177
+ repo.git_pull()
178
+ REPOSITORY_DATA_DIR = os.path.join(REPOSITORY_DIR,'data')
179
+ repo_recordings = [os.path.join(REPOSITORY_DATA_DIR,f.name) for f in os.scandir(REPOSITORY_DATA_DIR)] if os.path.isdir(REPOSITORY_DATA_DIR) else []
180
+
181
+ audio_repo = [os.path.join(f,'audio.wav') for f in repo_recordings]
182
+ audio_repo = [a.replace('data/data/','https://huggingface.co/datasets/chrisjay/crowd-speech-africa/resolve/main/data/') for a in audio_repo]
183
+ metadata_all = [read_json_lines(os.path.join(f,'metadata.jsonl'))[0] for f in repo_recordings]
184
+ audios_all = audio_repo
185
+
186
+ langs=[m['language_name'] for m in metadata_all]
187
+ lang_dict = Counter(langs)
188
+ lang_dict.update({'All others':0})
189
+ all_langs = list(lang_dict.keys())
190
+ langs_count = [lang_dict[k] for k in all_langs]
191
+ y_pos = np.arange(len(all_langs))
192
+ plt.barh(all_langs, langs_count)
193
+ plt.ylabel("Language")
194
+ plt.xlabel('Number of audio samples')
195
+ plt.title('Distribution of audio samples over languages')
196
+
197
+ #audios = [a for a in audios_all]
198
+ #texts = [m['text'] for m in metadata_all]
199
+ #numbers = [m['number'] for m in metadata_all]
200
+
201
+ html = f"""<div class="infoPoint">
202
+ <h1> Hooray! We have collected {len(metadata_all)} samples!</h1>
203
+ """
204
+
205
+ return html,plt
206
+
207
+
208
+
209
  def display_records():
210
  repo.git_pull()
211
  REPOSITORY_DATA_DIR = os.path.join(REPOSITORY_DIR,'data')
 
298
 
299
  save.click(save_record, inputs=[language,text,record,number,age,gender,accent,state,current_number,country,email,done_recording],outputs=[output_result,number,state,current_number,done_recording,record])
300
 
301
+ with gr.TabItem('Dashboard') as listen_tab:
302
+
303
  gr.Markdown("Listen to the recordings contributed. You can find them <a href='https://huggingface.co/datasets/chrisjay/crowd-speech-africa' target='blank'>here</a>.")
304
  display_html = gr.HTML("""<div style="color: green">
305
+ <p> ⌛ Please wait. Loading dashboard... </p>
306
  </div>
307
  """)
308
+ plot = gr.Plot(type="matplotlib")
309
 
310
  #listen = gr.Button("Listen")
311
+ listen_tab.select(show_records,inputs=[],outputs=[display_html,plot])
312
  gr.Markdown(ARTICLE)
313
 
314
  block.launch()
data CHANGED
@@ -1 +1 @@
1
- Subproject commit c10367e2eb0d27b88a70eeba0258400ea0e22469
 
1
+ Subproject commit c252b9acd77ce54411e803ecf5c66bfeafb1b887
requirements.txt CHANGED
@@ -1,3 +1,5 @@
1
  pandas
2
  scipy
3
- pycountry
 
 
 
1
  pandas
2
  scipy
3
+ pycountry
4
+ numpy
5
+ matplotlib