liquidcarbon commited on
Commit
0af2a8e
1 Parent(s): 5a864fe
Files changed (4) hide show
  1. Dockerfile +2 -1
  2. history.json +3 -0
  3. main.py +126 -69
  4. sql.py +161 -14
Dockerfile CHANGED
@@ -25,9 +25,10 @@ RUN $PUP
25
 
26
  RUN pup py3.11
27
 
28
- RUN pup fetch appenv duckdb gradio itables
29
 
30
  COPY --chown=user *.py .
 
31
 
32
  EXPOSE 7860
33
  CMD ["appenv/.venv/bin/uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
25
 
26
  RUN pup py3.11
27
 
28
+ RUN pup fetch appenv duckdb gradio itables python-ulid
29
 
30
  COPY --chown=user *.py .
31
+ COPY --chown=user *.json .
32
 
33
  EXPOSE 7860
34
  CMD ["appenv/.venv/bin/uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
history.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {"id": "01HWDNFA8QB96PV3RQCEPSG3C2", "q": "SELECT 42 AS answer", "alias": "example1", "template": "SELECT {x} AS {colname}", "definitions": "# Define variables: one '=' per line\nx=42\ncolname='answer'", "kwargs": {"x": 42, "colname": "answer"}, "rows": 1, "cols": 1, "source_id": null, "start": "2024-04-26 16:58:09.479", "end": "2024-04-26 16:58:09.479"}
2
+ {"id": "01HWDNFA8QTEA1Y1V229501RMF", "q": "SELECT\n Symbol,\n Number,\n Mass,\n Abundance\nFROM 'https://raw.githubusercontent.com/ekwan/cctk/master/cctk/data/isotopes.csv'", "alias": "example2", "template": "SELECT\n Symbol,\n Number,\n Mass,\n Abundance\nFROM '{url}'", "definitions": "url = https://raw.githubusercontent.com/ekwan/cctk/master/cctk/data/isotopes.csv", "kwargs": {"url": "https://raw.githubusercontent.com/ekwan/cctk/master/cctk/data/isotopes.csv"}, "rows": 354, "cols": 4, "source_id": null, "start": "2024-04-26 16:58:13.425", "end": "2024-04-26 16:58:13.725"}
3
+ {"id": "01HWE3CN3BPK6S4XT9B7H3GMSA", "q": "SELECT *\nFROM 'history.json'\nORDER BY id DESC", "alias": "example3", "template": "SELECT *\nFROM 'history.json'\nORDER BY id DESC", "kwargs": {}, "definitions": "", "rows": 2, "cols": 11, "source_id": null, "start": "2024-04-26 21:01:13.707", "end": "2024-04-26 21:01:13.709"}
main.py CHANGED
@@ -10,101 +10,158 @@ import gradio as gr
10
  import pandas as pd
11
  from fastapi import FastAPI
12
  from fastapi.responses import HTMLResponse, RedirectResponse
13
- from itables import options as itoptions, to_html_datatable
14
-
15
  from sql import Q
16
 
 
17
  itoptions.classes = "display compact cell-border"
18
  itoptions.column_filters = "footer"
 
 
 
 
 
 
 
 
 
19
 
20
- HEAD = """
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  """
22
- # <link rel="stylesheet" href="https://cdn.datatables.net/2.0.5/css/dataTables.dataTables.min.css" />
23
-
24
- EXAMPLE1 = """
25
- SELECT
26
- Symbol,
27
- Number,
28
- Mass,
29
- Abundance
30
- FROM 'https://raw.githubusercontent.com/ekwan/cctk/master/cctk/data/isotopes.csv'
31
  """
32
 
33
- EXAMPLE2 = """
34
- SELECT
35
- 42 AS answer,
36
- 'Life, Universe & Everything' AS question
 
 
 
 
 
 
 
37
  """
38
 
 
39
  app = FastAPI()
 
 
 
 
 
40
 
41
- @app.get("/q/{base64query}", response_class=HTMLResponse)
42
- def query_db(base64query: str|None = None):
43
- """Endpoint for running b64-encoded SQL queries."""
44
- decoded = Q.from_base64(base64query)
45
- df = decoded.df()
46
- html = to_html_datatable(df, buttons=["copyHtml5"])
47
- return f"""
48
- <head>
49
- {HEAD}
50
- </head>
51
- <div style="width:95vh;">{html}</div>
52
- <hr><pre>{decoded}</pre>
53
- """
54
 
55
- def query_from_request(query, request: gr.Request):
56
- """Process query from input block or from initial request.
 
 
 
 
 
 
 
 
 
 
57
 
58
- https://github.com/gradio-app/gradio/issues/7464#issuecomment-1960161591
59
- """
60
- if not query:
61
- query_params = request.query_params
62
- base64query = dict(query_params).get("q")
63
- else:
64
- base64query = Q(query).base64
65
- if base64query in (None, "example"):
66
- decoded = Q(EXAMPLE2)
67
- base64query = decoded.base64
68
  else:
69
- decoded = Q.from_base64(base64query)
 
 
 
 
 
70
 
71
- _host = request.headers.get("Host")
72
- if "huggingface.co/spaces" in _host:
73
- # modify URL to access endpoints that aren't available on default app landing page
74
- split_url = _host.rsplit("/", maxsplit=2)
75
- hf_user, hf_space = split_url[1], split_url[2]
76
- host = f"https://{hf_user}-{hf_space}.hf.space"
77
- else:
78
- host = _host
79
- editor_url = f"{host}/sql/?q={base64query}"
80
- query_url = f"{host}/q/{base64query}"
81
  result = f"""
82
  <div id="resultContainer">
83
- <iframe src="/q/{base64query}" width="90%" height="90%"></iframe>
84
- </div>"""
85
- return (decoded, editor_url, query_url, result)
 
 
 
 
 
 
 
 
 
 
86
 
87
  with gr.Blocks(
88
  title="Gradio DuckDB Editor",
89
- css="#resultContainer {height: 75vh;}"
 
90
  ) as gradio_sql_interface:
91
  with gr.Row():
92
- with gr.Column(scale=1, min_width=420):
93
- header = gr.Markdown("# SQL Editor")
94
- sql_code = gr.Code(language="sql", label="SQL Query", lines=32, interactive=True)
95
- button = gr.Button("run")
 
 
 
 
 
96
  editor_url = gr.Code(label="Share Editor URL", lines=1)
97
- query_url = gr.Code(label="Share Query Results URL", lines=1)
98
- with gr.Column(scale=1, min_width=540):
99
- markdown = gr.Markdown("# RESULTS")
100
- results = gr.HTML()
101
- button.click(query_from_request, [sql_code], [sql_code, editor_url, query_url, results])
102
- gradio_sql_interface.load(query_from_request, [sql_code], [sql_code, editor_url, query_url, results])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
  app = gr.mount_gradio_app(app, gradio_sql_interface, path="/sql")
105
 
106
  @app.get("/")
107
- @app.get("/sql")
108
- @app.get("/sql/")
109
  def redirect_to_example():
110
- return RedirectResponse("/sql/?q=example")
 
10
  import pandas as pd
11
  from fastapi import FastAPI
12
  from fastapi.responses import HTMLResponse, RedirectResponse
13
+ from fastapi.middleware.cors import CORSMiddleware
 
14
  from sql import Q
15
 
16
+ from itables import options as itoptions, to_html_datatable
17
  itoptions.classes = "display compact cell-border"
18
  itoptions.column_filters = "footer"
19
+ itoptions.layout = {
20
+ "top": {
21
+ "buttons": ["copyHtml5", "csvHtml5", "pageLength"],
22
+ },
23
+ "top2": "search",
24
+ "top3": "info",
25
+ "topStart": None,
26
+ "topEnd": None,
27
+ "bottom": "paging",
28
 
29
+ "bottomStart": None,
30
+ "bottomEnd": None,
31
+ }
32
+
33
+
34
+ CSS = """
35
+ #resultContainer {
36
+ height: calc(90vh - 40px);
37
+ box-shadow: var(--block-shadow) !important;
38
+ border-width: var(--block-border-width) !important;
39
+ border-color: var(--block-border-color) !important;
40
+ border-radius: var(--block-radius) !important;
41
+ background: var(--block-background-fill) !important;
42
+ }
43
  """
44
+
45
+ HEAD = """
 
 
 
 
 
 
 
46
  """
47
 
48
+ RESULT_TEMPLATE = f"""
49
+ <!doctype html>
50
+ <html>
51
+ <head>{HEAD}</head>
52
+ <body>
53
+ <div style="">{{datatable}}</div>
54
+ <hr><h3>Formatted query:</h3>
55
+ <pre>{{q}}</pre>
56
+ <hr>ran at {{ran}}</h3>
57
+ </body>
58
+ </html>
59
  """
60
 
61
+
62
  app = FastAPI()
63
+ app.add_middleware(
64
+ CORSMiddleware,
65
+ allow_origins=["*"],
66
+ allow_methods=["GET", "POST"],
67
+ )
68
 
69
+ @app.get("/q")
70
+ def retrieve_query(query_id: str|None = None, alias: str|None = None):
71
+ """Endpoint for retrieving saved SQL queries."""
72
+ q = Q.from_history(query_id=query_id, alias=alias)
73
+ return HTMLResponse(content=run_query(q))
 
 
 
 
 
 
 
 
74
 
75
+ def run_query(q: Q, save=True, html_template=RESULT_TEMPLATE):
76
+ try:
77
+ df = q.df(save=save, _raise=True)
78
+ except Exception as e:
79
+ df = pd.DataFrame({"error": [str(e)]})
80
+ result_datatable = to_html_datatable(df)
81
+ html = html_template.format(
82
+ datatable=result_datatable,
83
+ q=q,
84
+ ran=q.end.datetime.strftime("%F %T")
85
+ )
86
+ return html
87
 
88
+ def query_from_request(sql_input, definitions, request: gr.Request):
89
+ """Process query from input block or from initial request."""
90
+ host = change_hf_host(request)
91
+ if not sql_input and not definitions:
92
+ # landing page or saved query
93
+ url_query_params = dict(request.query_params)
94
+ query_id = url_query_params.get("q")
95
+ alias = url_query_params.get("alias")
96
+ q = Q.from_history(query_id=query_id, alias=alias)
97
+ iframe_src = f"/q?query_id={q.source_id}"
98
  else:
99
+ # new query - run button was pressed, register unexecuted query
100
+ q = Q.from_template_and_definitions(sql_input, definitions)
101
+ q.save()
102
+ query_id = q.id
103
+ alias = q.alias
104
+ iframe_src = f"/q?query_id={query_id}"
105
 
 
 
 
 
 
 
 
 
 
 
106
  result = f"""
107
  <div id="resultContainer">
108
+ <iframe src="{iframe_src}" width="99%" height="99%"></iframe>
109
+ </div>
110
+ """
111
+ sql_input = q.template
112
+ definitions = f"{q.definitions}"
113
+ editor_url = "".join([
114
+ f"http://{host}/sql?",
115
+ f"q={query_id}" if query_id else "",
116
+ f"&alias={alias}" if alias else "",
117
+ ])
118
+ result_url = f"http://{host}{iframe_src}"
119
+ return (sql_input, definitions, result, editor_url, result_url)
120
+
121
 
122
  with gr.Blocks(
123
  title="Gradio DuckDB Editor",
124
+ theme=gr.themes.Soft(),
125
+ css=CSS,
126
  ) as gradio_sql_interface:
127
  with gr.Row():
128
+ with gr.Column(scale=2, min_width=450):
129
+ caption1 = gr.Markdown("# SQL Editor\nClick buttons below to see examples")
130
+ with gr.Row():
131
+ ex1_button = gr.Button("Variables", link="/sql?alias=example1")
132
+ ex2_button = gr.Button("URL", link="/sql?alias=example2")
133
+ ex3_button = gr.Button("Local File", link="/sql?alias=example3")
134
+ definitions = gr.Code(label="Definitions", lines=2, interactive=True)
135
+ sql_input = gr.Code(label="SQL Query", language="sql", lines=25, interactive=True)
136
+ run_button = gr.Button("run", variant="primary")
137
  editor_url = gr.Code(label="Share Editor URL", lines=1)
138
+ result_url = gr.Code(label="Share Query Results URL", lines=1)
139
+ with gr.Column(scale=3, min_width=600):
140
+ caption2 = gr.Markdown("# RESULTS")
141
+ result = gr.HTML(elem_classes="block")
142
+
143
+ magic = dict(
144
+ fn=query_from_request,
145
+ inputs=[sql_input, definitions],
146
+ outputs=[sql_input, definitions, result, editor_url, result_url]
147
+ )
148
+ run_button.click(**magic)
149
+ gradio_sql_interface.load(**magic)
150
+
151
+ def change_hf_host(request: gr.Request):
152
+ """Access endpoints that hidden from default HF app landing page."""
153
+ _host = request.headers.get("Host")
154
+ if "huggingface.co/spaces" in _host:
155
+ split_url = _host.rsplit("/", maxsplit=2)
156
+ hf_user, hf_space = split_url[1], split_url[2]
157
+ host = f"https://{hf_user}-{hf_space}.hf.space"
158
+ else:
159
+ host = _host
160
+ return host
161
+
162
 
163
  app = gr.mount_gradio_app(app, gradio_sql_interface, path="/sql")
164
 
165
  @app.get("/")
 
 
166
  def redirect_to_example():
167
+ return RedirectResponse("/sql?alias=example1")
sql.py CHANGED
@@ -1,30 +1,130 @@
 
1
  import base64
2
  import duckdb
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  class Q(str):
4
  UNSAFE = ["CREATE", "DELETE", "DROP", "INSERT", "UPDATE"]
5
-
6
- def __init__(self, query: str):
7
- self.is_safe = not any([cmd in query.upper() for cmd in self.UNSAFE])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- def run(self, sql_engine=None):
 
 
 
 
 
 
 
 
 
10
  try:
11
  if sql_engine is None:
12
- return self.run_duckdb()
13
  else:
14
- return self.run_sql(sql_engine)
 
 
15
  except Exception as e:
16
- pass
17
-
 
 
 
 
 
 
18
  def run_duckdb(self):
19
  return duckdb.sql(self)
20
 
21
- def df(self, sql_engine=None):
22
- result = self.run(sql_engine=sql_engine)
23
- if result is None: return
24
- result_df = result.df()
25
- return result_df
 
 
 
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  @property
29
  def base64(self):
30
  return base64.b64encode(self.encode()).decode()
@@ -32,4 +132,51 @@ class Q(str):
32
  @classmethod
33
  def from_base64(cls, b64):
34
  """Initializing from base64-encoded URL paths."""
35
- return cls(base64.b64decode(b64).decode())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
  import base64
3
  import duckdb
4
+ import json
5
+ import re
6
+ import textwrap
7
+ from ulid import ULID
8
+
9
+ HISTORY_FILE = "history.json"
10
+
11
+ class SQLError(Exception):
12
+ pass
13
+
14
+ class NotFoundError(Exception):
15
+ pass
16
+
17
  class Q(str):
18
  UNSAFE = ["CREATE", "DELETE", "DROP", "INSERT", "UPDATE"]
19
+ rows=None
20
+ def __new__(cls, template: str, **kwargs):
21
+ """Create a new Q-string."""
22
+ _template = textwrap.dedent(template).strip()
23
+ try:
24
+ instance = str.__new__(cls, _template.format(**kwargs))
25
+ except KeyError:
26
+ instance = str.__new__(cls, _template)
27
+ instance.id = str(ULID())
28
+ instance.alias = kwargs.pop("alias") if kwargs.get("alias") else None
29
+ instance.template = _template
30
+ instance.kwargs = kwargs
31
+ instance.definitions = "\n".join([f"{k} = {repr(v)}" for k, v in kwargs.items()])
32
+
33
+ for attr in ("rows", "cols", "source_id", "start", "end"):
34
+ setattr(instance, attr, None)
35
+ return instance
36
 
37
+ def __repr__(self):
38
+ """Neat repr for inspecting Q objects."""
39
+ strings = []
40
+ for k, v in self.__dict__.items():
41
+ value_repr = "\n" + textwrap.indent(v, " ") if "\n" in str(v) else v
42
+ strings.append(f"{k}: {value_repr}")
43
+ return "\n".join(strings)
44
+
45
+ def run(self, sql_engine=None, save=False, _raise=False):
46
+ self.start = ULID()
47
  try:
48
  if sql_engine is None:
49
+ res = self.run_duckdb()
50
  else:
51
+ res = self.run_sql(sql_engine)
52
+ self.rows, self.cols = res.shape
53
+ return res
54
  except Exception as e:
55
+ if _raise:
56
+ raise e
57
+ return str(e)
58
+ finally:
59
+ self.end = ULID()
60
+ if save:
61
+ self.save()
62
+
63
  def run_duckdb(self):
64
  return duckdb.sql(self)
65
 
66
+ def df(self, sql_engine=None, save=False, _raise=False):
67
+ res = self.run(sql_engine=sql_engine, save=save, _raise=_raise)
68
+ if not getattr(self, "rows", None):
69
+ return
70
+ else:
71
+ result_df = res.df()
72
+ result_df.q = self
73
+ return result_df
74
 
75
+ def save(self, file=HISTORY_FILE):
76
+ with open(file, "a") as f:
77
+ f.write(self.json)
78
+ f.write("\n")
79
+
80
+ @property
81
+ def json(self):
82
+ serialized = {"id": self.id, "q": self}
83
+ serialized.update(self.__dict__)
84
+ return json.dumps(serialized, default=lambda x: x.datetime.strftime("%F %T.%f")[:-3])
85
+
86
+ @property
87
+ def is_safe(self):
88
+ return not any(cmd in self.template.upper() for cmd in self.UNSAFE)
89
 
90
+
91
+ @classmethod
92
+ def from_dict(cls, query_dict: dict):
93
+ q = query_dict.pop("q")
94
+ return cls(q, **query_dict)
95
+
96
+ @classmethod
97
+ def from_template_and_definitions(cls, template: str, definitions: str, alias: str|None = None):
98
+ query_dict = {"q": template, "alias": alias}
99
+ query_dict.update(parse_definitions(definitions))
100
+ instance = Q.from_dict(query_dict)
101
+ instance.definitions = definitions
102
+ return instance
103
+
104
+ @classmethod
105
+ def from_history(cls, query_id=None, alias=None):
106
+ search_query = Q(f"""
107
+ SELECT id, template, kwargs
108
+ FROM '{HISTORY_FILE}'
109
+ WHERE id='{query_id}' OR alias='{alias}'
110
+ LIMIT 1
111
+ """)
112
+ query = search_query.run()
113
+ if search_query.rows == 1:
114
+ source_id, template, kwargs = query.fetchall()[0]
115
+ kwargs = {k: v for k, v in kwargs.items() if v is not None}
116
+ instance = cls(template, **kwargs)
117
+ instance.source_id = source_id
118
+ return instance
119
+ elif search_query.rows == 0:
120
+ raise NotFoundError(f"id '{query_id}' / alias '{alias}' not found")
121
+ else:
122
+ raise SQLError(query)
123
+
124
+ # @property
125
+ # def definitions(self):
126
+ # return "\n".join([""]+[f"{k} = {v}" for k, v in self.kwargs.items()])
127
+
128
  @property
129
  def base64(self):
130
  return base64.b64encode(self.encode()).decode()
 
132
  @classmethod
133
  def from_base64(cls, b64):
134
  """Initializing from base64-encoded URL paths."""
135
+ return cls(base64.b64decode(b64).decode())
136
+
137
+
138
+ def parse_definitions(definitions) -> dict:
139
+ """Parse a string literal of "key=value" pairs, one per line, into kwargs."""
140
+ kwargs = {}
141
+ lines = definitions.split("\n")
142
+ for _line in lines:
143
+ line = re.sub("\s+", "", _line)
144
+ if line == "" or line.startswith("#"):
145
+ continue
146
+ if "=" in line:
147
+ key, value = line.split("=", maxsplit=1)
148
+ kwargs[key] = ast.literal_eval(value)
149
+ return kwargs
150
+
151
+
152
+ EX1 = Q.from_template_and_definitions(
153
+ template="SELECT {x} AS {colname}",
154
+ definitions="\n".join([
155
+ "# Define variables: one '=' per line",
156
+ "x=42",
157
+ "colname='answer'",
158
+ ]),
159
+ alias="example1",
160
+ )
161
+
162
+ EX2 = Q(
163
+ """
164
+ SELECT
165
+ Symbol,
166
+ Number,
167
+ Mass,
168
+ Abundance
169
+ FROM '{url}'
170
+ """,
171
+ url="https://raw.githubusercontent.com/ekwan/cctk/master/cctk/data/isotopes.csv",
172
+ alias="example2",
173
+ )
174
+
175
+ EX3 = Q(
176
+ """
177
+ SELECT /
178
+ FROM 'history.json'
179
+ ORDER BY id DESC
180
+ """,
181
+ alias="example3",
182
+ )