davanstrien HF staff commited on
Commit
84bfe38
1 Parent(s): 85ef5ed

add results number slider

Browse files
Files changed (1) hide show
  1. app.py +34 -16
app.py CHANGED
@@ -1,10 +1,11 @@
1
- import gradio as gr
2
- from qdrant_client import QdrantClient
3
- from qdrant_client import models
4
- from sentence_transformers import SentenceTransformer
5
- from dotenv import load_dotenv
6
  import os
7
  from functools import lru_cache
 
 
 
 
 
 
8
 
9
  load_dotenv()
10
 
@@ -22,25 +23,31 @@ client = QdrantClient(
22
 
23
 
24
  def format_results(results):
25
- markdown = ""
 
 
 
26
  for result in results:
27
  hub_id = result.payload["id"]
 
28
  url = f"https://huggingface.co/datasets/{hub_id}"
29
  header = f"## [{hub_id}]({url})"
30
  markdown += header + "\n"
31
- markdown += result.payload["section_text"] + "\n"
 
 
32
  return markdown
33
 
34
 
35
  @lru_cache(maxsize=100_000)
36
- def search(query: str):
37
  query_ = sentence_embedding_model.encode(
38
  f"Represent this sentence for searching relevant passages:{query}"
39
  )
40
  results = client.search(
41
  collection_name="dataset_cards",
42
  query_vector=query_,
43
- limit=10,
44
  )
45
  return format_results(results)
46
 
@@ -68,17 +75,19 @@ def hub_id_qdrant_id(hub_id):
68
 
69
 
70
  @lru_cache()
71
- def recommend(hub_id):
72
  positive_id = hub_id_qdrant_id(hub_id)
73
- results = client.recommend(collection_name=collection_name, positive=[positive_id])
 
 
74
  return format_results(results)
75
 
76
 
77
- def query(search_term, search_type):
78
  if search_type == "Recommend similar datasets":
79
- return recommend(search_term)
80
  else:
81
- return search(search_term)
82
 
83
 
84
  with gr.Blocks() as demo:
@@ -94,6 +103,7 @@ with gr.Blocks() as demo:
94
  value="movie review sentiment",
95
  label="hub id i.e. IMDB or query i.e. movie review sentiment",
96
  )
 
97
  with gr.Row():
98
  with gr.Row():
99
  find_similar_btn = gr.Button("Search")
@@ -103,9 +113,17 @@ with gr.Blocks() as demo:
103
  value="Semantic Search",
104
  interactive=True,
105
  )
106
-
 
 
 
 
 
 
 
 
107
  results = gr.Markdown()
108
- find_similar_btn.click(query, [search_term, search_type], results)
109
 
110
 
111
  demo.launch()
 
 
 
 
 
 
1
  import os
2
  from functools import lru_cache
3
+ from typing import Optional
4
+
5
+ import gradio as gr
6
+ from dotenv import load_dotenv
7
+ from qdrant_client import QdrantClient, models
8
+ from sentence_transformers import SentenceTransformer
9
 
10
  load_dotenv()
11
 
 
23
 
24
 
25
  def format_results(results):
26
+ markdown = (
27
+ "<h1 style='text-align: center;'> &#x2728; Dataset Search Results &#x2728;"
28
+ " </h1> \n\n"
29
+ )
30
  for result in results:
31
  hub_id = result.payload["id"]
32
+ download_number = result.payload["downloads"]
33
  url = f"https://huggingface.co/datasets/{hub_id}"
34
  header = f"## [{hub_id}]({url})"
35
  markdown += header + "\n"
36
+ markdown += f"**Downloads:** {download_number}\n\n"
37
+ markdown += f"{result.payload['section_text']} \n"
38
+
39
  return markdown
40
 
41
 
42
  @lru_cache(maxsize=100_000)
43
+ def search(query: str, limit: Optional[int] = 10):
44
  query_ = sentence_embedding_model.encode(
45
  f"Represent this sentence for searching relevant passages:{query}"
46
  )
47
  results = client.search(
48
  collection_name="dataset_cards",
49
  query_vector=query_,
50
+ limit=limit,
51
  )
52
  return format_results(results)
53
 
 
75
 
76
 
77
  @lru_cache()
78
+ def recommend(hub_id, limit: Optional[int] = 10):
79
  positive_id = hub_id_qdrant_id(hub_id)
80
+ results = client.recommend(
81
+ collection_name=collection_name, positive=[positive_id], limit=limit
82
+ )
83
  return format_results(results)
84
 
85
 
86
+ def query(search_term, search_type, limit: Optional[int] = 10):
87
  if search_type == "Recommend similar datasets":
88
+ return recommend(search_term, limit)
89
  else:
90
+ return search(search_term, limit)
91
 
92
 
93
  with gr.Blocks() as demo:
 
103
  value="movie review sentiment",
104
  label="hub id i.e. IMDB or query i.e. movie review sentiment",
105
  )
106
+
107
  with gr.Row():
108
  with gr.Row():
109
  find_similar_btn = gr.Button("Search")
 
113
  value="Semantic Search",
114
  interactive=True,
115
  )
116
+ with gr.Column():
117
+ max_results = gr.Slider(
118
+ minimum=1,
119
+ maximum=50,
120
+ step=1,
121
+ value=10,
122
+ label="Maximum number of results",
123
+ help="This is the maximum number of results that will be returned",
124
+ )
125
  results = gr.Markdown()
126
+ find_similar_btn.click(query, [search_term, search_type, max_results], results)
127
 
128
 
129
  demo.launch()