ola13 commited on
Commit
e55d3fc
1 Parent(s): dfc4a0d

formatting

Browse files
Files changed (1) hide show
  1. app.py +68 -40
app.py CHANGED
@@ -14,31 +14,30 @@ import requests
14
  pp = pprint.PrettyPrinter(indent=2)
15
  st.set_page_config(page_title="Gaia Search", layout="wide")
16
 
17
- os.makedirs(os.path.join(os.getcwd(),".streamlit"), exist_ok = True)
18
- with open(os.path.join(os.getcwd(),".streamlit/config.toml"), "w") as file:
19
- file.write(
20
- '[theme]\nbase="light"'
21
- )
22
-
23
- LANG_MAPPING = {'Arabic':'ar',
24
- 'Catalan':'ca',
25
- 'Code':'code',
26
- 'English':'en',
27
- 'Spanish':'es',
28
- 'French':'fr',
29
- 'Indonesian':'id',
30
- 'Indic':'indic',
31
- 'Niger-Congo':'nigercongo',
32
- 'Portuguese': 'pt',
33
- 'Vietnamese': 'vi',
34
- 'Chinese': 'zh',
35
- 'Detect Language':'detect_language',
36
- 'All':'all'}
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
 
40
  st.sidebar.markdown(
41
- """
42
  <style>
43
  .aligncenter {
44
  text-align: center;
@@ -49,11 +48,11 @@ st.sidebar.markdown(
49
  <p class="aligncenter">Gaia Search 🌖🌏</p>
50
  <p style="text-align: center;"> A search engine for the LAION large scale image caption corpora</p>
51
  """,
52
- unsafe_allow_html=True,
53
  )
54
 
55
  st.sidebar.markdown(
56
- """
57
  <style>
58
  .aligncenter {
59
  text-align: center;
@@ -68,14 +67,30 @@ st.sidebar.markdown(
68
  </a>
69
  </p>
70
  """,
71
- unsafe_allow_html=True,
72
  )
73
 
74
- query = st.sidebar.text_input(label='Search query', value='')
75
  language = st.sidebar.selectbox(
76
- 'Language',
77
- ('Arabic', 'Catalan', 'Code', 'English', 'Spanish', 'French', 'Indonesian', 'Indic', 'Niger-Congo', 'Portuguese', 'Vietnamese', 'Chinese', 'Detect Language', 'All'),
78
- index=3)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  max_results = st.sidebar.slider(
80
  "Maximum Number of Results",
81
  min_value=1,
@@ -84,7 +99,7 @@ max_results = st.sidebar.slider(
84
  value=10,
85
  help="Maximum Number of Documents to return",
86
  )
87
- footer="""<style>
88
  .footer {
89
  position: fixed;
90
  left: 0;
@@ -99,7 +114,7 @@ text-align: center;
99
  <p>Powered by <a href="https://huggingface.co/" >HuggingFace 🤗</a> and <a href="https://github.com/castorini/pyserini" >Pyserini 🦆</a></p>
100
  </div>
101
  """
102
- st.sidebar.markdown(footer,unsafe_allow_html=True)
103
 
104
 
105
  def scisearch(query, language, num_results=10):
@@ -144,32 +159,38 @@ def scisearch(query, language, num_results=10):
144
 
145
  return results, highlight_terms
146
 
 
147
  PII_TAGS = {"KEY", "EMAIL", "USER", "IP_ADDRESS", "ID", "IPv4", "IPv6"}
148
  PII_PREFIX = "PI:"
149
 
 
150
  def process_pii(text):
151
  for tag in PII_TAGS:
152
  text = text.replace(
153
  PII_PREFIX + tag,
154
- """<b><mark style="background: Fuchsia; color: Lime;">REDACTED {}</mark></b>""".format(tag),
155
- )
 
 
156
  return text
157
 
 
158
  def highlight_string(paragraph: str, highlight_terms: list) -> str:
159
  for term in highlight_terms:
160
  paragraph = re.sub(f"\\b{term}\\b", f"<b>{term}</b>", paragraph, flags=re.I)
161
  paragraph = process_pii(paragraph)
162
  return paragraph
163
 
 
164
  def process_results(hits: list, highlight_terms: list) -> str:
165
  hit_list = []
166
  for i, hit in enumerate(hits):
167
  res_head = f"""
168
  <div class="searchresult">
169
  <h2>{i+1}. Document ID: {hit['docid']}</h2>
170
- <p>Language: <string>{hit['lang']}</string>, Score: {round(hit['score'], 2)}</p>
171
  """
172
- for subhit in hit['meta']['docs']:
173
  res_head += f"""
174
  <button onclick="load_image({subhit['_id']})">Load Image</button><br>
175
  <p><img id='{subhit['_id']}' src='{subhit['URL']}' style="width:400px;height:auto;display:none;"></p>
@@ -195,16 +216,19 @@ if st.sidebar.button("Search"):
195
  {html_results}
196
  </div>
197
  """
198
- st.markdown("""
 
199
  <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet"
200
  integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous">
201
  """,
202
- unsafe_allow_html=True)
 
203
  st.markdown(
204
  """
205
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
206
  """,
207
- unsafe_allow_html=True)
 
208
  st.markdown(
209
  f"""
210
  <div class="row no-gutters mt-3 align-items-center">
@@ -219,7 +243,8 @@ if st.sidebar.button("Search"):
219
  </div>
220
  </div>
221
  """,
222
- unsafe_allow_html=True)
 
223
  components.html(
224
  """
225
  <style>
@@ -269,5 +294,8 @@ if st.sidebar.button("Search"):
269
  }
270
  </script>
271
  <button onclick="myFunction()">Toggle dark mode</button>
272
- """ + rendered_results, height=800, scrolling=True
273
- )
 
 
 
 
14
  pp = pprint.PrettyPrinter(indent=2)
15
  st.set_page_config(page_title="Gaia Search", layout="wide")
16
 
17
+ os.makedirs(os.path.join(os.getcwd(), ".streamlit"), exist_ok=True)
18
+ with open(os.path.join(os.getcwd(), ".streamlit/config.toml"), "w") as file:
19
+ file.write('[theme]\nbase="light"')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ LANG_MAPPING = {
22
+ "Arabic": "ar",
23
+ "Catalan": "ca",
24
+ "Code": "code",
25
+ "English": "en",
26
+ "Spanish": "es",
27
+ "French": "fr",
28
+ "Indonesian": "id",
29
+ "Indic": "indic",
30
+ "Niger-Congo": "nigercongo",
31
+ "Portuguese": "pt",
32
+ "Vietnamese": "vi",
33
+ "Chinese": "zh",
34
+ "Detect Language": "detect_language",
35
+ "All": "all",
36
+ }
37
 
38
 
39
  st.sidebar.markdown(
40
+ """
41
  <style>
42
  .aligncenter {
43
  text-align: center;
 
48
  <p class="aligncenter">Gaia Search 🌖🌏</p>
49
  <p style="text-align: center;"> A search engine for the LAION large scale image caption corpora</p>
50
  """,
51
+ unsafe_allow_html=True,
52
  )
53
 
54
  st.sidebar.markdown(
55
+ """
56
  <style>
57
  .aligncenter {
58
  text-align: center;
 
67
  </a>
68
  </p>
69
  """,
70
+ unsafe_allow_html=True,
71
  )
72
 
73
+ query = st.sidebar.text_input(label="Search query", value="")
74
  language = st.sidebar.selectbox(
75
+ "Language",
76
+ (
77
+ "Arabic",
78
+ "Catalan",
79
+ "Code",
80
+ "English",
81
+ "Spanish",
82
+ "French",
83
+ "Indonesian",
84
+ "Indic",
85
+ "Niger-Congo",
86
+ "Portuguese",
87
+ "Vietnamese",
88
+ "Chinese",
89
+ "Detect Language",
90
+ "All",
91
+ ),
92
+ index=3,
93
+ )
94
  max_results = st.sidebar.slider(
95
  "Maximum Number of Results",
96
  min_value=1,
 
99
  value=10,
100
  help="Maximum Number of Documents to return",
101
  )
102
+ footer = """<style>
103
  .footer {
104
  position: fixed;
105
  left: 0;
 
114
  <p>Powered by <a href="https://huggingface.co/" >HuggingFace 🤗</a> and <a href="https://github.com/castorini/pyserini" >Pyserini 🦆</a></p>
115
  </div>
116
  """
117
+ st.sidebar.markdown(footer, unsafe_allow_html=True)
118
 
119
 
120
  def scisearch(query, language, num_results=10):
 
159
 
160
  return results, highlight_terms
161
 
162
+
163
  PII_TAGS = {"KEY", "EMAIL", "USER", "IP_ADDRESS", "ID", "IPv4", "IPv6"}
164
  PII_PREFIX = "PI:"
165
 
166
+
167
  def process_pii(text):
168
  for tag in PII_TAGS:
169
  text = text.replace(
170
  PII_PREFIX + tag,
171
+ """<b><mark style="background: Fuchsia; color: Lime;">REDACTED {}</mark></b>""".format(
172
+ tag
173
+ ),
174
+ )
175
  return text
176
 
177
+
178
  def highlight_string(paragraph: str, highlight_terms: list) -> str:
179
  for term in highlight_terms:
180
  paragraph = re.sub(f"\\b{term}\\b", f"<b>{term}</b>", paragraph, flags=re.I)
181
  paragraph = process_pii(paragraph)
182
  return paragraph
183
 
184
+
185
  def process_results(hits: list, highlight_terms: list) -> str:
186
  hit_list = []
187
  for i, hit in enumerate(hits):
188
  res_head = f"""
189
  <div class="searchresult">
190
  <h2>{i+1}. Document ID: {hit['docid']}</h2>
191
+ <p>Language: <string>FIX MEEEE</string>, Score: {round(hit['score'], 2)}</p>
192
  """
193
+ for subhit in hit["meta"]["docs"]:
194
  res_head += f"""
195
  <button onclick="load_image({subhit['_id']})">Load Image</button><br>
196
  <p><img id='{subhit['_id']}' src='{subhit['URL']}' style="width:400px;height:auto;display:none;"></p>
 
216
  {html_results}
217
  </div>
218
  """
219
+ st.markdown(
220
+ """
221
  <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet"
222
  integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous">
223
  """,
224
+ unsafe_allow_html=True,
225
+ )
226
  st.markdown(
227
  """
228
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
229
  """,
230
+ unsafe_allow_html=True,
231
+ )
232
  st.markdown(
233
  f"""
234
  <div class="row no-gutters mt-3 align-items-center">
 
243
  </div>
244
  </div>
245
  """,
246
+ unsafe_allow_html=True,
247
+ )
248
  components.html(
249
  """
250
  <style>
 
294
  }
295
  </script>
296
  <button onclick="myFunction()">Toggle dark mode</button>
297
+ """
298
+ + rendered_results,
299
+ height=800,
300
+ scrolling=True,
301
+ )