Commit
Β·
f6aec2d
1
Parent(s):
a8ab183
feat: add basic app for query composer
Browse files- .gitignore +1 -0
- README.md +2 -2
- app.py +116 -0
- demo.py +13 -0
- requirements.txt +2 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
.env
|
README.md
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
---
|
2 |
title: Text To Sql Hub Datasets
|
3 |
-
emoji:
|
4 |
colorFrom: blue
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.44.0
|
8 |
app_file: app.py
|
9 |
-
pinned:
|
10 |
license: apache-2.0
|
11 |
---
|
12 |
|
|
|
1 |
---
|
2 |
title: Text To Sql Hub Datasets
|
3 |
+
emoji: π₯ π¦ π€
|
4 |
colorFrom: blue
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.44.0
|
8 |
app_file: app.py
|
9 |
+
pinned: true
|
10 |
license: apache-2.0
|
11 |
---
|
12 |
|
app.py
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
import urllib.parse
|
4 |
+
|
5 |
+
import gradio as gr
|
6 |
+
import requests
|
7 |
+
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
8 |
+
from huggingface_hub import InferenceClient
|
9 |
+
|
10 |
+
example = HuggingfaceHubSearch().example_value()
|
11 |
+
|
12 |
+
client = InferenceClient(
|
13 |
+
"meta-llama/Meta-Llama-3.1-70B-Instruct",
|
14 |
+
token=os.environ["HF_TOKEN"],
|
15 |
+
)
|
16 |
+
|
17 |
+
|
18 |
+
def get_iframe(hub_repo_id, sql_query=None):
|
19 |
+
if sql_query:
|
20 |
+
sql_query = urllib.parse.quote(sql_query)
|
21 |
+
url = f"https://huggingface.co/datasets/{hub_repo_id}/embed/viewer?sql_console=true&sql={sql_query}"
|
22 |
+
else:
|
23 |
+
url = f"https://huggingface.co/datasets/{hub_repo_id}/embed/viewer"
|
24 |
+
iframe = f"""
|
25 |
+
<iframe
|
26 |
+
src="{url}"
|
27 |
+
frameborder="0"
|
28 |
+
width="100%"
|
29 |
+
height="560px"
|
30 |
+
></iframe>
|
31 |
+
"""
|
32 |
+
return iframe
|
33 |
+
|
34 |
+
|
35 |
+
def get_column_info(hub_repo_id):
|
36 |
+
url: str = f"https://datasets-server.huggingface.co/info?dataset={hub_repo_id}"
|
37 |
+
response = requests.get(url)
|
38 |
+
data = response.json()
|
39 |
+
data = data.get("dataset_info")
|
40 |
+
key = list(data.keys())[0]
|
41 |
+
description = data.get(key).get("description")
|
42 |
+
features = json.dumps(data.get(key).get("features"))
|
43 |
+
return description, features
|
44 |
+
|
45 |
+
|
46 |
+
def query_dataset(hub_repo_id, description, features, query):
|
47 |
+
messages = [
|
48 |
+
{
|
49 |
+
"role": "system",
|
50 |
+
"content": "You are a helpful assistant that returns a DuckDB SQL query based on the user's query and dataset features. Only return the SQL query, no other text.",
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"role": "user",
|
54 |
+
"content": f"""# Description
|
55 |
+
{description}
|
56 |
+
|
57 |
+
# Features
|
58 |
+
{features}
|
59 |
+
|
60 |
+
# Query
|
61 |
+
{query}
|
62 |
+
""",
|
63 |
+
},
|
64 |
+
]
|
65 |
+
response = client.chat_completion(
|
66 |
+
messages=messages,
|
67 |
+
max_tokens=1000,
|
68 |
+
stream=False,
|
69 |
+
)
|
70 |
+
query = response.choices[0].message.content
|
71 |
+
return query, get_iframe(hub_repo_id, query)
|
72 |
+
|
73 |
+
|
74 |
+
with gr.Blocks() as demo:
|
75 |
+
with gr.Row():
|
76 |
+
with gr.Column():
|
77 |
+
search_in = HuggingfaceHubSearch(
|
78 |
+
label="Search Huggingface Hub",
|
79 |
+
placeholder="Search for models on Huggingface",
|
80 |
+
search_type="dataset",
|
81 |
+
)
|
82 |
+
|
83 |
+
btn = gr.Button("Show Dataset")
|
84 |
+
with gr.Row():
|
85 |
+
search_out = gr.HTML(label="Search Results")
|
86 |
+
with gr.Row():
|
87 |
+
description = gr.Textbox(
|
88 |
+
label="Description", placeholder="Description from dataset or project page"
|
89 |
+
)
|
90 |
+
features = gr.Code(label="Features", language="json")
|
91 |
+
with gr.Row():
|
92 |
+
query = gr.Textbox(label="Query")
|
93 |
+
with gr.Row():
|
94 |
+
sql_out = gr.Code(label="SQL Query")
|
95 |
+
with gr.Row():
|
96 |
+
btn2 = gr.Button("Query Dataset")
|
97 |
+
|
98 |
+
gr.on(
|
99 |
+
[btn.click, search_in.submit],
|
100 |
+
fn=get_iframe,
|
101 |
+
inputs=[search_in],
|
102 |
+
outputs=[search_out],
|
103 |
+
).then(
|
104 |
+
fn=get_column_info,
|
105 |
+
inputs=[search_in],
|
106 |
+
outputs=[description, features],
|
107 |
+
)
|
108 |
+
|
109 |
+
btn2.click(
|
110 |
+
fn=query_dataset,
|
111 |
+
inputs=[search_in, description, features, query],
|
112 |
+
outputs=[sql_out],
|
113 |
+
)
|
114 |
+
|
115 |
+
if __name__ == "__main__":
|
116 |
+
demo.launch()
|
demo.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
|
3 |
+
|
4 |
+
def get_column_info(hub_repo_id):
|
5 |
+
url = f"https://datasets-server.huggingface.co/info?dataset={hub_repo_id}"
|
6 |
+
response = requests.get(url)
|
7 |
+
data = response.json()
|
8 |
+
description = data.get("dataset_info").get(hub_repo_id).get("description")
|
9 |
+
features = data.get("dataset_info").get(hub_repo_id).get("features")
|
10 |
+
return description, features
|
11 |
+
|
12 |
+
|
13 |
+
get_column_info("davidberenstein1957/test")
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
gradio-huggingfacehub-search
|
2 |
+
gradio
|