Hrsh-Venket commited on
Commit
1c827e2
Β·
1 Parent(s): aaa69b0

new file: app.py

Browse files

new file: hrsh-test.mp3
new file: karanrecording.mp3
new file: requirements.txt
new file: shaunakrecording.mp3
new file: test_1.mp3
new file: testaudio.mp3

Files changed (7) hide show
  1. app.py +128 -0
  2. hrsh-test.mp3 +0 -0
  3. karanrecording.mp3 +0 -0
  4. requirements.txt +144 -0
  5. shaunakrecording.mp3 +0 -0
  6. test_1.mp3 +0 -0
  7. testaudio.mp3 +0 -0
app.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingsound import SpeechRecognitionModel
3
+ from transformers import logging
4
+ from transformers import pipeline
5
+ from transformers import BertTokenizer, BertModel
6
+ from pydub import AudioSegment
7
+ unmasker = pipeline('fill-mask', model='bert-base-uncased')
8
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
9
+ model = BertModel.from_pretrained("bert-base-uncased")
10
+ import os
11
+
12
+
13
+ def levenshtein_distance(s, t):
14
+ m, n = len(s), len(t)
15
+ d = [[0] * (n+1) for _ in range(m+1)]
16
+
17
+ for i in range(m+1):
18
+ d[i][0] = i
19
+
20
+ for j in range(n+1):
21
+ d[0][j] = j
22
+
23
+ for j in range(1, n+1):
24
+ for i in range(1, m+1):
25
+ if s[i-1] == t[j-1]:
26
+ d[i][j] = d[i-1][j-1]
27
+ else:
28
+ d[i][j] = 1 + min(d[i-1][j], d[i][j-1], d[i-1][j-1])
29
+
30
+ return d[m][n]
31
+
32
+ def collate(input):
33
+ pun_marks = [",", ".", "?", "!", ";", ":", "-", "β€”", "(", ")", "[", "]", "{", "}", "'", "\"", "`"]
34
+ output = ""
35
+ Capital = True
36
+ Dash = False
37
+ for i in range(len(input)):
38
+ if input[i] in pun_marks:
39
+ output += input[i]
40
+ if input[i] in [".", "("]:
41
+ Capital = True
42
+ if input[i] in ["-", "'"]:
43
+ Dash = True
44
+ else:
45
+ Dash = False
46
+ else:
47
+ str = ""
48
+ if (Dash == False):
49
+ str += " "
50
+ if Capital:
51
+ str += input[i].capitalize()
52
+ Capital = False
53
+ else:
54
+ str += input[i]
55
+ output += str
56
+ return output
57
+
58
+ def everything(audio_paths):
59
+ w2vmodel = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-english")
60
+ logging.set_verbosity_error() #change'error' to 'warning' or remove this if you want to see the warning
61
+ # https://huggingface.co/jonatasgrosman/wav2vec2-large-xlsr-53-english
62
+ # https://huggingface.co/bert-base-uncased
63
+
64
+ transcriptions = w2vmodel.transcribe(audio_paths)
65
+
66
+ return transcriptions
67
+ # input = transcriptions[0]["transcription"]
68
+ # input = input.split()
69
+
70
+ # #(1) is a strategy where tokens are used to determine lexicographic distance
71
+ # #(2) is a strategy where replaced words
72
+ # for t in range(1):
73
+ # # output = [] #(2)
74
+ # for i in range(len(input)):
75
+ # temp = input[i]
76
+ # token = tokenizer(temp)['input_ids'][1]
77
+ # input[i] = "[MASK]"
78
+ # apiint = unmasker(' '.join(input))
79
+ # dist = []
80
+ # for r in range(5):
81
+ # # if (np.abs((apiint[r]['token'] - token)) < 2): #(1)
82
+ # dist.append(levenshtein_distance(temp, apiint[r]['token_str']))
83
+ # lindex = 0
84
+ # l = dist[0]
85
+ # for r in range(5):
86
+ # if dist[r] < l:
87
+ # lindex = r
88
+
89
+ # l = dist[r]
90
+ # if l <= 2:
91
+ # input[i] = apiint[lindex]['token_str']
92
+ # # output.append(apiint[lindex]['token_str']) #(2)
93
+ # else:
94
+ # input[i] = temp
95
+ # # output.append(temp) #(2)
96
+ # # input[i] = temp #(2)
97
+
98
+ # for t in range(1):
99
+ # inndex = 1
100
+ # for i in range(len(input)):
101
+ # input.insert(inndex, "[MASK]")
102
+ # # print(' '.join(input))
103
+ # apiint = unmasker(' '.join(input))
104
+ # if (apiint[0]['token'] < 1500):
105
+ # input[inndex] = apiint[0]["token_str"]
106
+ # inndex += 2
107
+ # else:
108
+ # del input[inndex]
109
+ # inndex += 1
110
+
111
+ # st.write(collate(input))
112
+
113
+ # # In comparison, a plain autocorrect gives this output:
114
+
115
+ # # "The b-movie by Jerry Sinclair, the sound of buzzing
116
+ # # bees, can be heard according to all known laws of
117
+ # # aviation that is no way for b to be able to fly its
118
+ # # wings are too small to get its start little body off
119
+ # # the ground, the be, of course, flies anyway because ``
120
+ # # bees don't care what humans think is possible.
121
+ # # Barbuda is guaranteed one member of the House of
122
+ # # Representatives and two members of the Senate."
123
+
124
+ # # - https://huggingface.co/oliverguhr/spelling-correction-english-base?text=lets+do+a+comparsion
125
+
126
+ demo = gr.Interface(fn=everything,
127
+ inputs = [gr.UploadButton]
128
+ outputs = ["text"])
hrsh-test.mp3 ADDED
Binary file (109 kB). View file
 
karanrecording.mp3 ADDED
Binary file (70 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # aiohttp==3.8.4
2
+ # aiosignal==1.3.1
3
+ # altair==4.2.2
4
+ # appdirs==1.4.4
5
+ # asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1670263926556/work
6
+ # async-timeout==4.0.2
7
+ # attrs==23.1.0
8
+ # audioread==3.0.0
9
+ # backcall @ file:///home/conda/feedstock_root/build_artifacts/backcall_1592338393461/work
10
+ # backports.functools-lru-cache @ file:///home/conda/feedstock_root/build_artifacts/backports.functools_lru_cache_1618230623929/work
11
+ # blinker==1.6.2
12
+ # # blur-detector==0.0.6
13
+ # brotlipy==0.7.0
14
+ # cachetools==5.3.0
15
+ # certifi==2022.12.7
16
+ # cffi @ file:///C:/ci/cffi_1625831756778/work
17
+ # chardet @ file:///C:/ci/chardet_1607706937985/work
18
+ # charset-normalizer==3.1.0
19
+ # click==8.1.3
20
+ # colorama @ file:///home/conda/feedstock_root/build_artifacts/colorama_1666700638685/work
21
+ # conda==4.14.0
22
+ # conda-package-handling @ file:///C:/ci/conda-package-handling_1618262410900/work
23
+ # contourpy==1.0.7
24
+ # cryptography @ file:///C:/ci/cryptography_1616769504165/work
25
+ # cycler==0.11.0
26
+ # datasets==2.11.0
27
+ # debugpy @ file:///C:/ci/debugpy_1637091961445/work
28
+ # decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1641555617451/work
29
+ # dill==0.3.6
30
+ # distlib==0.3.6
31
+ # entrypoints==0.4
32
+ # et-xmlfile==1.1.0
33
+ # executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1667317341051/work
34
+ # filelock==3.12.0
35
+ # fonttools==4.39.3
36
+ # frozenlist==1.3.3
37
+ # fsspec==2023.4.0
38
+ # gitdb==4.0.10
39
+ # GitPython==3.1.31
40
+ # huggingface-hub==0.13.4
41
+ huggingsound==0.1.6
42
+ # idna @ file:///home/linux1/recipes/ci/idna_1610986105248/work
43
+ # imageio==2.27.0
44
+ # importlib-metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1679167925176/work
45
+ # importlib-resources==5.12.0
46
+ # install==1.3.5
47
+ # ipykernel @ file:///D:/bld/ipykernel_1655369313836/work
48
+ # ipython @ file:///D:/bld/ipython_1680185618122/work
49
+ # ipywidgets @ file:///home/conda/feedstock_root/build_artifacts/ipywidgets_1680023138361/work
50
+ # jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1669134318875/work
51
+ # Jinja2==3.1.2
52
+ # jiwer==2.6.0
53
+ # joblib==1.2.0
54
+ # jsonschema==4.17.3
55
+ # jupyter-client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1679365123476/work
56
+ # jupyter-core @ file:///C:/b/abs_bd7elvu3w2/croot/jupyter_core_1676538600510/work
57
+ # jupyterlab-widgets @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_widgets_1680020489668/work
58
+ # kiwisolver==1.4.4
59
+ # lazy-loader==0.2
60
+ # librosa==0.9.2
61
+ # llvmlite==0.39.1
62
+ # markdown-it-py==2.2.0
63
+ # MarkupSafe==2.1.2
64
+ # matplotlib==3.7.1
65
+ # matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1660814786464/work
66
+ # mdurl==0.1.2
67
+ # menuinst==1.4.16
68
+ # mpmath==1.3.0
69
+ # msgpack==1.0.5
70
+ # multidict==6.0.4
71
+ # multiprocess==0.70.14
72
+ # nest-asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1664684991461/work
73
+ # networkx==3.1
74
+ # numba==0.56.4
75
+ # numpy==1.23.5
76
+ # oauthlib==3.2.2
77
+ # # opencv-python==4.7.0.72
78
+ # openpyxl==3.1.2
79
+ # packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1673482170163/work
80
+ # pandas==2.0.0
81
+ # parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1638334955874/work
82
+ # pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602536217715/work
83
+ # Pillow==9.5.0
84
+ # pipenv==2023.4.20
85
+ # platformdirs @ file:///home/conda/feedstock_root/build_artifacts/platformdirs_1679871349196/work
86
+ # pooch==1.6.0
87
+ # prompt-toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1677600924538/work
88
+ # protobuf==3.20.3
89
+ # psutil @ file:///C:/Windows/Temp/abs_b2c2fd7f-9fd5-4756-95ea-8aed74d0039flsd9qufz/croots/recipe/psutil_1656431277748/work
90
+ # pure-eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1642875951954/work
91
+ # pyarrow==11.0.0
92
+ # pyasn1==0.4.8
93
+ # pyasn1-modules==0.2.8
94
+ # pycosat==0.6.3
95
+ # pycparser @ file:///tmp/build/80754af9/pycparser_1594388511720/work
96
+ # pydeck==0.8.1b0
97
+ pydub==0.25.1
98
+ # Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1672682006896/work
99
+ # Pympler==1.0.1
100
+ # pyOpenSSL @ file:///tmp/build/80754af9/pyopenssl_1608057966937/work
101
+ # pyparsing==3.0.9
102
+ # pyrsistent==0.19.3
103
+ # PySocks @ file:///C:/ci/pysocks_1605307512533/work
104
+ # python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1626286286081/work
105
+ # pytz==2023.3
106
+ # pytz-deprecation-shim==0.1.0.post0
107
+ # PyWavelets==1.4.1
108
+ # pywin32==228
109
+ # PyYAML==6.0
110
+ # pyzmq @ file:///C:/ci/pyzmq_1657615952984/work
111
+ # rapidfuzz==2.13.7
112
+ # regex==2023.3.23
113
+ # requests @ file:///tmp/build/80754af9/requests_1608241421344/work
114
+ # requests-oauthlib==1.3.1
115
+ # resampy==0.4.2
116
+ # responses==0.18.0
117
+ # rich==13.3.5
118
+ # rsa==4.9
119
+ # ruamel-yaml-conda @ file:///C:/ci/ruamel_yaml_1616016898638/work
120
+ # scikit-image==0.20.0
121
+ # scikit-learn==1.2.2
122
+ # scipy==1.9.1
123
+ # # seaborn==0.12.2
124
+ # six @ file:///tmp/build/80754af9/six_1623709665295/work
125
+ # # sklearn==0.0
126
+ # smmap==5.0.0
127
+ # soundfile==0.12.1
128
+ # soxr==0.3.5
129
+ # stack-data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1669632077133/work
130
+ streamlit==1.22.0
131
+ # sympy==1.11.1
132
+ # tenacity==8.2.2
133
+ # threadpoolctl==3.1.0
134
+ # tifffile==2023.3.21
135
+ # tokenizers==0.13.3
136
+ # toml==0.10.2
137
+ # toolz @ file:///home/conda/feedstock_root/build_artifacts/toolz_1657485559105/work
138
+ # # torch==1.12.1
139
+ # # torchaudio==2.0.1+cu118
140
+ # # torchvision==0.15.1+cu118
141
+ # tornado @ file:///D:/bld/tornado_1656937934674/work
142
+ # tqdm==4.65.0
143
+ # traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1675110562325/work
144
+ transformers==4.28.1
shaunakrecording.mp3 ADDED
Binary file (553 kB). View file
 
test_1.mp3 ADDED
Binary file (598 kB). View file
 
testaudio.mp3 ADDED
Binary file (127 kB). View file