Soumen commited on
Commit
c75cc74
1 Parent(s): d05f84f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -10
app.py CHANGED
@@ -25,8 +25,13 @@ import os
25
  #os.system('gunzip ben.traineddata.gz ')
26
  #os.system('sudo mv -v ben.traineddata /usr/local/share/tessdata/')
27
  #os.system('pip install -q pytesseract')
28
- import streamlit as st
29
- import os
 
 
 
 
 
30
  import torch
31
  from transformers import AutoTokenizer, AutoModelWithLMHead, GPT2LMHeadModel
32
 
@@ -64,28 +69,99 @@ def entity_analyzer(my_text):
64
  entities = [(entity.text,entity.label_)for entity in docx.ents]
65
  allData = ['"Token":{},\n"Entities":{}'.format(tokens,entities)]
66
  return allData
67
-
68
-
69
  def main():
70
  """ NLP Based App with Streamlit """
71
  st.markdown("""
72
  #### Description
73
  This is a Natural Language Processing(NLP) Based App useful for basic NLP task
74
  NER,Sentiment, Spell Corrections and Summarization
75
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  # Entity Extraction
77
- message = st.text_area("type here ..")
78
  if st.checkbox("Show Named Entities"):
79
  st.subheader("Analyze Your Text")
80
  if st.button("Extract"):
81
- entity_result = entity_analyzer(message)
82
  st.json(entity_result)
83
 
84
  # Sentiment Analysis
85
  if st.checkbox("Show Sentiment Analysis"):
86
  st.subheader("Analyse Your Text")
87
  if st.button("Analyze"):
88
- blob = TextBlob(message)
89
  result_sentiment = blob.sentiment
90
  st.success(result_sentiment)
91
  #Text Corrections
@@ -93,13 +169,13 @@ def main():
93
  st.subheader("Correct Your Text")
94
  if st.button("Spell Corrections"):
95
  st.text("Using TextBlob ..")
96
- st.success(TextBlob(message).correct())
97
  if st.checkbox("Text Generation"):
98
  st.subheader("Generate Text")
99
  ok = st.button("Generate")
100
  tokenizer, model = load_models()
101
  if ok:
102
- input_ids = tokenizer(message, return_tensors='pt').input_ids
103
  st.text("Using Hugging Face Transformer, Contrastive Search ..")
104
  output = model.generate(input_ids, max_length=128)
105
  st.success(tokenizer.decode(output[0], skip_special_tokens=True))
 
25
  #os.system('gunzip ben.traineddata.gz ')
26
  #os.system('sudo mv -v ben.traineddata /usr/local/share/tessdata/')
27
  #os.system('pip install -q pytesseract')
28
+ import streamlit as st
29
+ import websockets
30
+ import pyaudio
31
+ from configure import api_key
32
+ import json
33
+ import asyncio
34
+
35
  import torch
36
  from transformers import AutoTokenizer, AutoModelWithLMHead, GPT2LMHeadModel
37
 
 
69
  entities = [(entity.text,entity.label_)for entity in docx.ents]
70
  allData = ['"Token":{},\n"Entities":{}'.format(tokens,entities)]
71
  return allData
 
 
72
  def main():
73
  """ NLP Based App with Streamlit """
74
  st.markdown("""
75
  #### Description
76
  This is a Natural Language Processing(NLP) Based App useful for basic NLP task
77
  NER,Sentiment, Spell Corrections and Summarization
78
+ """)
79
+ if "text" not in st.session_state:
80
+ st.session_state["text"] = ""
81
+ st.session_state["run"] = False
82
+ def start_listening():
83
+ st.session_state["run"] = True
84
+ st.button("Say something", on_click=start_listening)
85
+ text = st.text_input("What should I create?", value=st.session_state["text"])
86
+ URL = "wss://api.assemblyai.com/v2/realtime/ws?sample_rate=16000"
87
+ FRAMES_PER_BUFFER = 3200
88
+ FORMAT = pyaudio.paInt16
89
+ CHANNELS = 1
90
+ RATE = 16000
91
+ p = pyaudio.PyAudio()
92
+ # starts recording
93
+ stream = p.open(
94
+ format=FORMAT,
95
+ channels=CHANNELS,
96
+ rate=RATE,
97
+ input=True,
98
+ frames_per_buffer=FRAMES_PER_BUFFER
99
+ )
100
+ async def send_receive():
101
+ print(f'Connecting websocket to url ${URL}')
102
+ async with websockets.connect(
103
+ URL,
104
+ extra_headers=(("Authorization", api_key),),
105
+ ping_interval=5,
106
+ ping_timeout=20
107
+ ) as _ws:
108
+
109
+ r = await asyncio.sleep(0.1)
110
+ print("Receiving Session begins ...")
111
+
112
+ session_begins = await _ws.recv()
113
+
114
+ async def send():
115
+ while st.session_state['run']:
116
+ try:
117
+ data = stream.read(FRAMES_PER_BUFFER)
118
+ data = base64.b64encode(data).decode("utf-8")
119
+ json_data = json.dumps({"audio_data":str(data)})
120
+ r = await _ws.send(json_data)
121
+ except websockets.exceptions.ConnectionClosedError as e:
122
+ print(e)
123
+ assert e.code == 4008
124
+ break
125
+ except Exception as e:
126
+ print(e)
127
+ assert False, "Not a websocket 4008 error"
128
+
129
+ r = await asyncio.sleep(0.01)
130
+
131
+
132
+ async def receive():
133
+ while st.session_state['run']:
134
+ try:
135
+ result_str = await _ws.recv()
136
+ result = json.loads(result_str)['text']
137
+
138
+ if json.loads(result_str)['message_type'] == 'FinalTranscript':
139
+ result = result.replace('.', '')
140
+ result = result.replace('!', '')
141
+ st.session_state['text'] = result
142
+ st.session_state['run'] = False
143
+ st.experimental_rerun()
144
+ except websockets.exceptions.ConnectionClosedError as e:
145
+ print(e)
146
+ assert e.code == 4008
147
+ break
148
+ except Exception as e:
149
+ print(e)
150
+ assert False, "Not a websocket 4008 error"
151
+
152
+ send_result, receive_result = await asyncio.gather(send(), receive())
153
  # Entity Extraction
 
154
  if st.checkbox("Show Named Entities"):
155
  st.subheader("Analyze Your Text")
156
  if st.button("Extract"):
157
+ entity_result = entity_analyzer(text)
158
  st.json(entity_result)
159
 
160
  # Sentiment Analysis
161
  if st.checkbox("Show Sentiment Analysis"):
162
  st.subheader("Analyse Your Text")
163
  if st.button("Analyze"):
164
+ blob = TextBlob(text)
165
  result_sentiment = blob.sentiment
166
  st.success(result_sentiment)
167
  #Text Corrections
 
169
  st.subheader("Correct Your Text")
170
  if st.button("Spell Corrections"):
171
  st.text("Using TextBlob ..")
172
+ st.success(TextBlob(text).correct())
173
  if st.checkbox("Text Generation"):
174
  st.subheader("Generate Text")
175
  ok = st.button("Generate")
176
  tokenizer, model = load_models()
177
  if ok:
178
+ input_ids = tokenizer(text, return_tensors='pt').input_ids
179
  st.text("Using Hugging Face Transformer, Contrastive Search ..")
180
  output = model.generate(input_ids, max_length=128)
181
  st.success(tokenizer.decode(output[0], skip_special_tokens=True))