Spaces:
Runtime error
Runtime error
Hrsh-Venket
commited on
Commit
Β·
1c827e2
1
Parent(s):
aaa69b0
new file: app.py
Browse filesnew file: hrsh-test.mp3
new file: karanrecording.mp3
new file: requirements.txt
new file: shaunakrecording.mp3
new file: test_1.mp3
new file: testaudio.mp3
- app.py +128 -0
- hrsh-test.mp3 +0 -0
- karanrecording.mp3 +0 -0
- requirements.txt +144 -0
- shaunakrecording.mp3 +0 -0
- test_1.mp3 +0 -0
- testaudio.mp3 +0 -0
app.py
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from huggingsound import SpeechRecognitionModel
|
3 |
+
from transformers import logging
|
4 |
+
from transformers import pipeline
|
5 |
+
from transformers import BertTokenizer, BertModel
|
6 |
+
from pydub import AudioSegment
|
7 |
+
unmasker = pipeline('fill-mask', model='bert-base-uncased')
|
8 |
+
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
9 |
+
model = BertModel.from_pretrained("bert-base-uncased")
|
10 |
+
import os
|
11 |
+
|
12 |
+
|
13 |
+
def levenshtein_distance(s, t):
|
14 |
+
m, n = len(s), len(t)
|
15 |
+
d = [[0] * (n+1) for _ in range(m+1)]
|
16 |
+
|
17 |
+
for i in range(m+1):
|
18 |
+
d[i][0] = i
|
19 |
+
|
20 |
+
for j in range(n+1):
|
21 |
+
d[0][j] = j
|
22 |
+
|
23 |
+
for j in range(1, n+1):
|
24 |
+
for i in range(1, m+1):
|
25 |
+
if s[i-1] == t[j-1]:
|
26 |
+
d[i][j] = d[i-1][j-1]
|
27 |
+
else:
|
28 |
+
d[i][j] = 1 + min(d[i-1][j], d[i][j-1], d[i-1][j-1])
|
29 |
+
|
30 |
+
return d[m][n]
|
31 |
+
|
32 |
+
def collate(input):
|
33 |
+
pun_marks = [",", ".", "?", "!", ";", ":", "-", "β", "(", ")", "[", "]", "{", "}", "'", "\"", "`"]
|
34 |
+
output = ""
|
35 |
+
Capital = True
|
36 |
+
Dash = False
|
37 |
+
for i in range(len(input)):
|
38 |
+
if input[i] in pun_marks:
|
39 |
+
output += input[i]
|
40 |
+
if input[i] in [".", "("]:
|
41 |
+
Capital = True
|
42 |
+
if input[i] in ["-", "'"]:
|
43 |
+
Dash = True
|
44 |
+
else:
|
45 |
+
Dash = False
|
46 |
+
else:
|
47 |
+
str = ""
|
48 |
+
if (Dash == False):
|
49 |
+
str += " "
|
50 |
+
if Capital:
|
51 |
+
str += input[i].capitalize()
|
52 |
+
Capital = False
|
53 |
+
else:
|
54 |
+
str += input[i]
|
55 |
+
output += str
|
56 |
+
return output
|
57 |
+
|
58 |
+
def everything(audio_paths):
|
59 |
+
w2vmodel = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-english")
|
60 |
+
logging.set_verbosity_error() #change'error' to 'warning' or remove this if you want to see the warning
|
61 |
+
# https://huggingface.co/jonatasgrosman/wav2vec2-large-xlsr-53-english
|
62 |
+
# https://huggingface.co/bert-base-uncased
|
63 |
+
|
64 |
+
transcriptions = w2vmodel.transcribe(audio_paths)
|
65 |
+
|
66 |
+
return transcriptions
|
67 |
+
# input = transcriptions[0]["transcription"]
|
68 |
+
# input = input.split()
|
69 |
+
|
70 |
+
# #(1) is a strategy where tokens are used to determine lexicographic distance
|
71 |
+
# #(2) is a strategy where replaced words
|
72 |
+
# for t in range(1):
|
73 |
+
# # output = [] #(2)
|
74 |
+
# for i in range(len(input)):
|
75 |
+
# temp = input[i]
|
76 |
+
# token = tokenizer(temp)['input_ids'][1]
|
77 |
+
# input[i] = "[MASK]"
|
78 |
+
# apiint = unmasker(' '.join(input))
|
79 |
+
# dist = []
|
80 |
+
# for r in range(5):
|
81 |
+
# # if (np.abs((apiint[r]['token'] - token)) < 2): #(1)
|
82 |
+
# dist.append(levenshtein_distance(temp, apiint[r]['token_str']))
|
83 |
+
# lindex = 0
|
84 |
+
# l = dist[0]
|
85 |
+
# for r in range(5):
|
86 |
+
# if dist[r] < l:
|
87 |
+
# lindex = r
|
88 |
+
|
89 |
+
# l = dist[r]
|
90 |
+
# if l <= 2:
|
91 |
+
# input[i] = apiint[lindex]['token_str']
|
92 |
+
# # output.append(apiint[lindex]['token_str']) #(2)
|
93 |
+
# else:
|
94 |
+
# input[i] = temp
|
95 |
+
# # output.append(temp) #(2)
|
96 |
+
# # input[i] = temp #(2)
|
97 |
+
|
98 |
+
# for t in range(1):
|
99 |
+
# inndex = 1
|
100 |
+
# for i in range(len(input)):
|
101 |
+
# input.insert(inndex, "[MASK]")
|
102 |
+
# # print(' '.join(input))
|
103 |
+
# apiint = unmasker(' '.join(input))
|
104 |
+
# if (apiint[0]['token'] < 1500):
|
105 |
+
# input[inndex] = apiint[0]["token_str"]
|
106 |
+
# inndex += 2
|
107 |
+
# else:
|
108 |
+
# del input[inndex]
|
109 |
+
# inndex += 1
|
110 |
+
|
111 |
+
# st.write(collate(input))
|
112 |
+
|
113 |
+
# # In comparison, a plain autocorrect gives this output:
|
114 |
+
|
115 |
+
# # "The b-movie by Jerry Sinclair, the sound of buzzing
|
116 |
+
# # bees, can be heard according to all known laws of
|
117 |
+
# # aviation that is no way for b to be able to fly its
|
118 |
+
# # wings are too small to get its start little body off
|
119 |
+
# # the ground, the be, of course, flies anyway because ``
|
120 |
+
# # bees don't care what humans think is possible.
|
121 |
+
# # Barbuda is guaranteed one member of the House of
|
122 |
+
# # Representatives and two members of the Senate."
|
123 |
+
|
124 |
+
# # - https://huggingface.co/oliverguhr/spelling-correction-english-base?text=lets+do+a+comparsion
|
125 |
+
|
126 |
+
demo = gr.Interface(fn=everything,
|
127 |
+
inputs = [gr.UploadButton]
|
128 |
+
outputs = ["text"])
|
hrsh-test.mp3
ADDED
Binary file (109 kB). View file
|
|
karanrecording.mp3
ADDED
Binary file (70 kB). View file
|
|
requirements.txt
ADDED
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# aiohttp==3.8.4
|
2 |
+
# aiosignal==1.3.1
|
3 |
+
# altair==4.2.2
|
4 |
+
# appdirs==1.4.4
|
5 |
+
# asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1670263926556/work
|
6 |
+
# async-timeout==4.0.2
|
7 |
+
# attrs==23.1.0
|
8 |
+
# audioread==3.0.0
|
9 |
+
# backcall @ file:///home/conda/feedstock_root/build_artifacts/backcall_1592338393461/work
|
10 |
+
# backports.functools-lru-cache @ file:///home/conda/feedstock_root/build_artifacts/backports.functools_lru_cache_1618230623929/work
|
11 |
+
# blinker==1.6.2
|
12 |
+
# # blur-detector==0.0.6
|
13 |
+
# brotlipy==0.7.0
|
14 |
+
# cachetools==5.3.0
|
15 |
+
# certifi==2022.12.7
|
16 |
+
# cffi @ file:///C:/ci/cffi_1625831756778/work
|
17 |
+
# chardet @ file:///C:/ci/chardet_1607706937985/work
|
18 |
+
# charset-normalizer==3.1.0
|
19 |
+
# click==8.1.3
|
20 |
+
# colorama @ file:///home/conda/feedstock_root/build_artifacts/colorama_1666700638685/work
|
21 |
+
# conda==4.14.0
|
22 |
+
# conda-package-handling @ file:///C:/ci/conda-package-handling_1618262410900/work
|
23 |
+
# contourpy==1.0.7
|
24 |
+
# cryptography @ file:///C:/ci/cryptography_1616769504165/work
|
25 |
+
# cycler==0.11.0
|
26 |
+
# datasets==2.11.0
|
27 |
+
# debugpy @ file:///C:/ci/debugpy_1637091961445/work
|
28 |
+
# decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1641555617451/work
|
29 |
+
# dill==0.3.6
|
30 |
+
# distlib==0.3.6
|
31 |
+
# entrypoints==0.4
|
32 |
+
# et-xmlfile==1.1.0
|
33 |
+
# executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1667317341051/work
|
34 |
+
# filelock==3.12.0
|
35 |
+
# fonttools==4.39.3
|
36 |
+
# frozenlist==1.3.3
|
37 |
+
# fsspec==2023.4.0
|
38 |
+
# gitdb==4.0.10
|
39 |
+
# GitPython==3.1.31
|
40 |
+
# huggingface-hub==0.13.4
|
41 |
+
huggingsound==0.1.6
|
42 |
+
# idna @ file:///home/linux1/recipes/ci/idna_1610986105248/work
|
43 |
+
# imageio==2.27.0
|
44 |
+
# importlib-metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1679167925176/work
|
45 |
+
# importlib-resources==5.12.0
|
46 |
+
# install==1.3.5
|
47 |
+
# ipykernel @ file:///D:/bld/ipykernel_1655369313836/work
|
48 |
+
# ipython @ file:///D:/bld/ipython_1680185618122/work
|
49 |
+
# ipywidgets @ file:///home/conda/feedstock_root/build_artifacts/ipywidgets_1680023138361/work
|
50 |
+
# jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1669134318875/work
|
51 |
+
# Jinja2==3.1.2
|
52 |
+
# jiwer==2.6.0
|
53 |
+
# joblib==1.2.0
|
54 |
+
# jsonschema==4.17.3
|
55 |
+
# jupyter-client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1679365123476/work
|
56 |
+
# jupyter-core @ file:///C:/b/abs_bd7elvu3w2/croot/jupyter_core_1676538600510/work
|
57 |
+
# jupyterlab-widgets @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_widgets_1680020489668/work
|
58 |
+
# kiwisolver==1.4.4
|
59 |
+
# lazy-loader==0.2
|
60 |
+
# librosa==0.9.2
|
61 |
+
# llvmlite==0.39.1
|
62 |
+
# markdown-it-py==2.2.0
|
63 |
+
# MarkupSafe==2.1.2
|
64 |
+
# matplotlib==3.7.1
|
65 |
+
# matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1660814786464/work
|
66 |
+
# mdurl==0.1.2
|
67 |
+
# menuinst==1.4.16
|
68 |
+
# mpmath==1.3.0
|
69 |
+
# msgpack==1.0.5
|
70 |
+
# multidict==6.0.4
|
71 |
+
# multiprocess==0.70.14
|
72 |
+
# nest-asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1664684991461/work
|
73 |
+
# networkx==3.1
|
74 |
+
# numba==0.56.4
|
75 |
+
# numpy==1.23.5
|
76 |
+
# oauthlib==3.2.2
|
77 |
+
# # opencv-python==4.7.0.72
|
78 |
+
# openpyxl==3.1.2
|
79 |
+
# packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1673482170163/work
|
80 |
+
# pandas==2.0.0
|
81 |
+
# parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1638334955874/work
|
82 |
+
# pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602536217715/work
|
83 |
+
# Pillow==9.5.0
|
84 |
+
# pipenv==2023.4.20
|
85 |
+
# platformdirs @ file:///home/conda/feedstock_root/build_artifacts/platformdirs_1679871349196/work
|
86 |
+
# pooch==1.6.0
|
87 |
+
# prompt-toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1677600924538/work
|
88 |
+
# protobuf==3.20.3
|
89 |
+
# psutil @ file:///C:/Windows/Temp/abs_b2c2fd7f-9fd5-4756-95ea-8aed74d0039flsd9qufz/croots/recipe/psutil_1656431277748/work
|
90 |
+
# pure-eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1642875951954/work
|
91 |
+
# pyarrow==11.0.0
|
92 |
+
# pyasn1==0.4.8
|
93 |
+
# pyasn1-modules==0.2.8
|
94 |
+
# pycosat==0.6.3
|
95 |
+
# pycparser @ file:///tmp/build/80754af9/pycparser_1594388511720/work
|
96 |
+
# pydeck==0.8.1b0
|
97 |
+
pydub==0.25.1
|
98 |
+
# Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1672682006896/work
|
99 |
+
# Pympler==1.0.1
|
100 |
+
# pyOpenSSL @ file:///tmp/build/80754af9/pyopenssl_1608057966937/work
|
101 |
+
# pyparsing==3.0.9
|
102 |
+
# pyrsistent==0.19.3
|
103 |
+
# PySocks @ file:///C:/ci/pysocks_1605307512533/work
|
104 |
+
# python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1626286286081/work
|
105 |
+
# pytz==2023.3
|
106 |
+
# pytz-deprecation-shim==0.1.0.post0
|
107 |
+
# PyWavelets==1.4.1
|
108 |
+
# pywin32==228
|
109 |
+
# PyYAML==6.0
|
110 |
+
# pyzmq @ file:///C:/ci/pyzmq_1657615952984/work
|
111 |
+
# rapidfuzz==2.13.7
|
112 |
+
# regex==2023.3.23
|
113 |
+
# requests @ file:///tmp/build/80754af9/requests_1608241421344/work
|
114 |
+
# requests-oauthlib==1.3.1
|
115 |
+
# resampy==0.4.2
|
116 |
+
# responses==0.18.0
|
117 |
+
# rich==13.3.5
|
118 |
+
# rsa==4.9
|
119 |
+
# ruamel-yaml-conda @ file:///C:/ci/ruamel_yaml_1616016898638/work
|
120 |
+
# scikit-image==0.20.0
|
121 |
+
# scikit-learn==1.2.2
|
122 |
+
# scipy==1.9.1
|
123 |
+
# # seaborn==0.12.2
|
124 |
+
# six @ file:///tmp/build/80754af9/six_1623709665295/work
|
125 |
+
# # sklearn==0.0
|
126 |
+
# smmap==5.0.0
|
127 |
+
# soundfile==0.12.1
|
128 |
+
# soxr==0.3.5
|
129 |
+
# stack-data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1669632077133/work
|
130 |
+
streamlit==1.22.0
|
131 |
+
# sympy==1.11.1
|
132 |
+
# tenacity==8.2.2
|
133 |
+
# threadpoolctl==3.1.0
|
134 |
+
# tifffile==2023.3.21
|
135 |
+
# tokenizers==0.13.3
|
136 |
+
# toml==0.10.2
|
137 |
+
# toolz @ file:///home/conda/feedstock_root/build_artifacts/toolz_1657485559105/work
|
138 |
+
# # torch==1.12.1
|
139 |
+
# # torchaudio==2.0.1+cu118
|
140 |
+
# # torchvision==0.15.1+cu118
|
141 |
+
# tornado @ file:///D:/bld/tornado_1656937934674/work
|
142 |
+
# tqdm==4.65.0
|
143 |
+
# traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1675110562325/work
|
144 |
+
transformers==4.28.1
|
shaunakrecording.mp3
ADDED
Binary file (553 kB). View file
|
|
test_1.mp3
ADDED
Binary file (598 kB). View file
|
|
testaudio.mp3
ADDED
Binary file (127 kB). View file
|
|