davidberenstein1957 HF staff commited on
Commit
f6aec2d
Β·
1 Parent(s): a8ab183

feat: add basic app for query composer

Browse files
Files changed (5) hide show
  1. .gitignore +1 -0
  2. README.md +2 -2
  3. app.py +116 -0
  4. demo.py +13 -0
  5. requirements.txt +2 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
README.md CHANGED
@@ -1,12 +1,12 @@
1
  ---
2
  title: Text To Sql Hub Datasets
3
- emoji: πŸ†
4
  colorFrom: blue
5
  colorTo: blue
6
  sdk: gradio
7
  sdk_version: 4.44.0
8
  app_file: app.py
9
- pinned: false
10
  license: apache-2.0
11
  ---
12
 
 
1
  ---
2
  title: Text To Sql Hub Datasets
3
+ emoji: πŸ₯ πŸ¦™ πŸ€—
4
  colorFrom: blue
5
  colorTo: blue
6
  sdk: gradio
7
  sdk_version: 4.44.0
8
  app_file: app.py
9
+ pinned: true
10
  license: apache-2.0
11
  ---
12
 
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import urllib.parse
4
+
5
+ import gradio as gr
6
+ import requests
7
+ from gradio_huggingfacehub_search import HuggingfaceHubSearch
8
+ from huggingface_hub import InferenceClient
9
+
10
+ example = HuggingfaceHubSearch().example_value()
11
+
12
+ client = InferenceClient(
13
+ "meta-llama/Meta-Llama-3.1-70B-Instruct",
14
+ token=os.environ["HF_TOKEN"],
15
+ )
16
+
17
+
18
+ def get_iframe(hub_repo_id, sql_query=None):
19
+ if sql_query:
20
+ sql_query = urllib.parse.quote(sql_query)
21
+ url = f"https://huggingface.co/datasets/{hub_repo_id}/embed/viewer?sql_console=true&sql={sql_query}"
22
+ else:
23
+ url = f"https://huggingface.co/datasets/{hub_repo_id}/embed/viewer"
24
+ iframe = f"""
25
+ <iframe
26
+ src="{url}"
27
+ frameborder="0"
28
+ width="100%"
29
+ height="560px"
30
+ ></iframe>
31
+ """
32
+ return iframe
33
+
34
+
35
+ def get_column_info(hub_repo_id):
36
+ url: str = f"https://datasets-server.huggingface.co/info?dataset={hub_repo_id}"
37
+ response = requests.get(url)
38
+ data = response.json()
39
+ data = data.get("dataset_info")
40
+ key = list(data.keys())[0]
41
+ description = data.get(key).get("description")
42
+ features = json.dumps(data.get(key).get("features"))
43
+ return description, features
44
+
45
+
46
+ def query_dataset(hub_repo_id, description, features, query):
47
+ messages = [
48
+ {
49
+ "role": "system",
50
+ "content": "You are a helpful assistant that returns a DuckDB SQL query based on the user's query and dataset features. Only return the SQL query, no other text.",
51
+ },
52
+ {
53
+ "role": "user",
54
+ "content": f"""# Description
55
+ {description}
56
+
57
+ # Features
58
+ {features}
59
+
60
+ # Query
61
+ {query}
62
+ """,
63
+ },
64
+ ]
65
+ response = client.chat_completion(
66
+ messages=messages,
67
+ max_tokens=1000,
68
+ stream=False,
69
+ )
70
+ query = response.choices[0].message.content
71
+ return query, get_iframe(hub_repo_id, query)
72
+
73
+
74
+ with gr.Blocks() as demo:
75
+ with gr.Row():
76
+ with gr.Column():
77
+ search_in = HuggingfaceHubSearch(
78
+ label="Search Huggingface Hub",
79
+ placeholder="Search for models on Huggingface",
80
+ search_type="dataset",
81
+ )
82
+
83
+ btn = gr.Button("Show Dataset")
84
+ with gr.Row():
85
+ search_out = gr.HTML(label="Search Results")
86
+ with gr.Row():
87
+ description = gr.Textbox(
88
+ label="Description", placeholder="Description from dataset or project page"
89
+ )
90
+ features = gr.Code(label="Features", language="json")
91
+ with gr.Row():
92
+ query = gr.Textbox(label="Query")
93
+ with gr.Row():
94
+ sql_out = gr.Code(label="SQL Query")
95
+ with gr.Row():
96
+ btn2 = gr.Button("Query Dataset")
97
+
98
+ gr.on(
99
+ [btn.click, search_in.submit],
100
+ fn=get_iframe,
101
+ inputs=[search_in],
102
+ outputs=[search_out],
103
+ ).then(
104
+ fn=get_column_info,
105
+ inputs=[search_in],
106
+ outputs=[description, features],
107
+ )
108
+
109
+ btn2.click(
110
+ fn=query_dataset,
111
+ inputs=[search_in, description, features, query],
112
+ outputs=[sql_out],
113
+ )
114
+
115
+ if __name__ == "__main__":
116
+ demo.launch()
demo.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+
3
+
4
+ def get_column_info(hub_repo_id):
5
+ url = f"https://datasets-server.huggingface.co/info?dataset={hub_repo_id}"
6
+ response = requests.get(url)
7
+ data = response.json()
8
+ description = data.get("dataset_info").get(hub_repo_id).get("description")
9
+ features = data.get("dataset_info").get(hub_repo_id).get("features")
10
+ return description, features
11
+
12
+
13
+ get_column_info("davidberenstein1957/test")
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio-huggingfacehub-search
2
+ gradio