ugaray96 commited on
Commit
710a34d
1 Parent(s): 687697c

Adds audio to text converter and fixes tfidf

Browse files
app.py CHANGED
@@ -11,12 +11,16 @@ st.set_page_config(
11
  from streamlit_option_menu import option_menu
12
  from interface.config import session_state_variables, pages
13
  from interface.components import component_select_pipeline
 
14
 
15
  # Initialization of session state
16
  for key, value in session_state_variables.items():
17
  if key not in st.session_state:
18
  st.session_state[key] = value
19
 
 
 
 
20
 
21
  def run_demo():
22
 
 
11
  from streamlit_option_menu import option_menu
12
  from interface.config import session_state_variables, pages
13
  from interface.components import component_select_pipeline
14
+ from interface.utils import load_audio_model
15
 
16
  # Initialization of session state
17
  for key, value in session_state_variables.items():
18
  if key not in st.session_state:
19
  st.session_state[key] = value
20
 
21
+ # Init audio model
22
+ st.session_state["audio_model"] = load_audio_model()
23
+
24
 
25
  def run_demo():
26
 
core/audio.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ import pydub
3
+ import os
4
+
5
+ whisper_model = "medium"
6
+
7
+
8
+ def load_model():
9
+ print("Loading audio model...")
10
+ return whisper.load_model(whisper_model)
11
+
12
+
13
+ def audio_to_text(model, audio_file):
14
+ audio = pydub.AudioSegment.from_file(audio_file)
15
+ # Export for loading later
16
+ audio.export("audio_tmp")
17
+ try:
18
+ audio = whisper.load_audio("audio_tmp")
19
+ audio = whisper.pad_or_trim(audio)
20
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
21
+ options = whisper.DecodingOptions()
22
+ result = whisper.decode(model, mel, options)
23
+ finally:
24
+ os.remove("audio_tmp")
25
+ return result.text
core/pipelines.py CHANGED
@@ -85,9 +85,7 @@ def dense_passage_retrieval(
85
  - One BERT base model to encode queries
86
  - Ranking of documents done by dot product similarity between query and document embeddings
87
  """
88
- global document_store
89
- if index != document_store.index:
90
- document_store = InMemoryDocumentStore(index=index)
91
  dpr_retriever = DensePassageRetriever(
92
  document_store=document_store,
93
  query_embedding_model=query_embedding_model,
 
85
  - One BERT base model to encode queries
86
  - Ranking of documents done by dot product similarity between query and document embeddings
87
  """
88
+ document_store = InMemoryDocumentStore(index=index)
 
 
89
  dpr_retriever = DensePassageRetriever(
90
  document_store=document_store,
91
  query_embedding_model=query_embedding_model,
interface/components.py CHANGED
@@ -51,6 +51,19 @@ def component_select_pipeline(container):
51
  "doc": pipeline_funcs[index_pipe].__doc__,
52
  }
53
  reset_vars_data()
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
 
56
  def component_show_pipeline(pipeline, pipeline_name):
@@ -126,7 +139,7 @@ def component_file_input(container, doc_id):
126
  with st.expander("Enter Files"):
127
  while True:
128
  file = st.file_uploader(
129
- "Upload a .txt, .pdf, .csv, image file", key=doc_id
130
  )
131
  if file != None:
132
  extracted_text = extract_text_from_file(file)
 
51
  "doc": pipeline_funcs[index_pipe].__doc__,
52
  }
53
  reset_vars_data()
54
+ # TODO: Use elasticsearch and remove this workaround for TFIDF
55
+ # Reload if Keyword Search is selected
56
+ elif st.session_state["pipeline"]["name"] == "Keyword Search":
57
+ st.session_state["pipeline_func_parameters"] = pipeline_func_parameters
58
+ (search_pipeline, index_pipeline,) = pipeline_funcs[
59
+ index_pipe
60
+ ](**pipeline_func_parameters[index_pipe])
61
+ st.session_state["pipeline"] = {
62
+ "name": selected_pipeline,
63
+ "search_pipeline": search_pipeline,
64
+ "index_pipeline": index_pipeline,
65
+ "doc": pipeline_funcs[index_pipe].__doc__,
66
+ }
67
 
68
 
69
  def component_show_pipeline(pipeline, pipeline_name):
 
139
  with st.expander("Enter Files"):
140
  while True:
141
  file = st.file_uploader(
142
+ "Upload a .txt, .pdf, .csv, image file, audio file", key=doc_id
143
  )
144
  if file != None:
145
  extracted_text = extract_text_from_file(file)
interface/utils.py CHANGED
@@ -3,6 +3,7 @@ import os
3
  import shutil
4
  import core.pipelines as pipelines_functions
5
  from core.pipelines import data_path
 
6
  from inspect import getmembers, isfunction, signature
7
  from newspaper import Article
8
  from PyPDF2 import PdfFileReader
@@ -96,9 +97,19 @@ def extract_text_from_file(file):
96
  return file_text
97
 
98
  # read image file (OCR)
99
- elif file.type == "image/jpeg":
100
  return pytesseract.image_to_string(Image.open(file))
101
 
 
 
 
 
 
102
  else:
103
  st.warning(f"File type {file.type} not supported")
104
  return None
 
 
 
 
 
 
3
  import shutil
4
  import core.pipelines as pipelines_functions
5
  from core.pipelines import data_path
6
+ from core.audio import audio_to_text, load_model
7
  from inspect import getmembers, isfunction, signature
8
  from newspaper import Article
9
  from PyPDF2 import PdfFileReader
 
97
  return file_text
98
 
99
  # read image file (OCR)
100
+ elif file.type in ["image/jpeg", "image/png"]:
101
  return pytesseract.image_to_string(Image.open(file))
102
 
103
+ # read audio file (AudoToText)
104
+ elif file.type in ["audio/mpeg", "audio/wav", "audio/aac", "audio/x-m4a"]:
105
+ text = audio_to_text(st.session_state["audio_model"], file)
106
+ return text
107
+
108
  else:
109
  st.warning(f"File type {file.type} not supported")
110
  return None
111
+
112
+
113
+ @st.experimental_singleton
114
+ def load_audio_model():
115
+ return load_model()
requirements.txt CHANGED
@@ -9,4 +9,5 @@ pytesseract==0.3.10
9
  soundfile==0.10.3.post1
10
  espnet
11
  pydub==0.25.1
12
- espnet_model_zoo==0.1.7
 
 
9
  soundfile==0.10.3.post1
10
  espnet
11
  pydub==0.25.1
12
+ espnet_model_zoo==0.1.7
13
+ git+https://github.com/openai/whisper.git