NotShrirang commited on
Commit
fe5df96
·
1 Parent(s): 8ed49ee

feat: add audio data input

Browse files
Files changed (1) hide show
  1. data_search/data_search_page.py +18 -4
data_search/data_search_page.py CHANGED
@@ -6,13 +6,13 @@ import streamlit as st
6
  import sys
7
  import torch
8
  from vectordb import search_image_index, search_text_index, search_image_index_with_image, search_text_index_with_image
9
- from utils import load_image_index, load_text_index, get_local_files
10
  from data_search import adapter_utils
11
 
12
  sys.path.append(os.path.dirname(os.path.abspath(__file__)))
13
 
14
 
15
- def data_search(clip_model, preprocess, text_embedding_model, device):
16
 
17
  @st.cache_resource
18
  def load_finetuned_model(file_name):
@@ -68,6 +68,8 @@ def data_search(clip_model, preprocess, text_embedding_model, device):
68
  image_index, image_data = load_image_index()
69
  if os.path.exists("./vectorstore/text_index.index"):
70
  text_index, text_data = load_text_index()
 
 
71
  with torch.no_grad():
72
  if not os.path.exists("./vectorstore/image_data.csv"):
73
  st.warning("No Image Index Found. So not searching for images.")
@@ -75,6 +77,8 @@ def data_search(clip_model, preprocess, text_embedding_model, device):
75
  if not os.path.exists("./vectorstore/text_data.csv"):
76
  st.warning("No Text Index Found. So not searching for text.")
77
  text_index = None
 
 
78
  if image_input:
79
  image = Image.open(image_input)
80
  image = preprocess(image).unsqueeze(0).to(device)
@@ -85,12 +89,16 @@ def data_search(clip_model, preprocess, text_embedding_model, device):
85
  image_indices = search_image_index_with_image(image_features, image_index, clip_model, k=3)
86
  if text_index is not None:
87
  text_indices = search_text_index_with_image(adapted_text_embeddings, text_index, text_embedding_model, k=3)
 
 
88
  else:
89
  if image_index is not None:
90
  image_indices = search_image_index(text_input, image_index, clip_model, k=3)
91
  if text_index is not None:
92
  text_indices = search_text_index(text_input, text_index, text_embedding_model, k=3)
93
- if not image_index and not text_index:
 
 
94
  st.error("No Data Found! Please add data to the database.")
95
  st.subheader("Top 3 Results")
96
  cols = st.columns(3)
@@ -111,4 +119,10 @@ def data_search(clip_model, preprocess, text_embedding_model, device):
111
  with cols[i]:
112
  if text_index:
113
  text_content = text_data['content'].iloc[text_indices[0][i]]
114
- st.write(text_content)
 
 
 
 
 
 
 
6
  import sys
7
  import torch
8
  from vectordb import search_image_index, search_text_index, search_image_index_with_image, search_text_index_with_image
9
+ from utils import load_image_index, load_text_index, load_audio_index, get_local_files
10
  from data_search import adapter_utils
11
 
12
  sys.path.append(os.path.dirname(os.path.abspath(__file__)))
13
 
14
 
15
+ def data_search(clip_model, preprocess, text_embedding_model, whisper_model, device):
16
 
17
  @st.cache_resource
18
  def load_finetuned_model(file_name):
 
68
  image_index, image_data = load_image_index()
69
  if os.path.exists("./vectorstore/text_index.index"):
70
  text_index, text_data = load_text_index()
71
+ if os.path.exists("./vectorstore/audio_index.index"):
72
+ audio_index, audio_data = load_audio_index()
73
  with torch.no_grad():
74
  if not os.path.exists("./vectorstore/image_data.csv"):
75
  st.warning("No Image Index Found. So not searching for images.")
 
77
  if not os.path.exists("./vectorstore/text_data.csv"):
78
  st.warning("No Text Index Found. So not searching for text.")
79
  text_index = None
80
+ if not os.path.exists("./vectorstore/audio_data.csv"):
81
+ st.warning("No Audio Index Found. So not searching for audio.")
82
  if image_input:
83
  image = Image.open(image_input)
84
  image = preprocess(image).unsqueeze(0).to(device)
 
89
  image_indices = search_image_index_with_image(image_features, image_index, clip_model, k=3)
90
  if text_index is not None:
91
  text_indices = search_text_index_with_image(adapted_text_embeddings, text_index, text_embedding_model, k=3)
92
+ if audio_index is not None:
93
+ audio_indices = search_text_index_with_image(adapted_text_embeddings, audio_index, text_embedding_model, k=3)
94
  else:
95
  if image_index is not None:
96
  image_indices = search_image_index(text_input, image_index, clip_model, k=3)
97
  if text_index is not None:
98
  text_indices = search_text_index(text_input, text_index, text_embedding_model, k=3)
99
+ if audio_index is not None:
100
+ audio_indices = search_text_index(text_input, audio_index, text_embedding_model, k=3)
101
+ if not image_index and not text_index and not audio_index:
102
  st.error("No Data Found! Please add data to the database.")
103
  st.subheader("Top 3 Results")
104
  cols = st.columns(3)
 
119
  with cols[i]:
120
  if text_index:
121
  text_content = text_data['content'].iloc[text_indices[0][i]]
122
+ st.write(text_content)
123
+ cols = st.columns(3)
124
+ for i in range(3):
125
+ with cols[i]:
126
+ if audio_index:
127
+ audio_path = audio_data['path'].iloc[audio_indices[0][i]]
128
+ st.audio(audio_path)