CalebCometML derek-thomas commited on
Commit
9adc040
·
0 Parent(s):

Duplicate from derek-thomas/kangas-demo

Browse files

Co-authored-by: Derek Thomas <derek-thomas@users.noreply.huggingface.co>

.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ datagrids/coco-500.datagrid filter=lfs diff=lfs merge=lfs -text
36
+ datagrids/beans.datagrid filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .idea
Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ COPY requirements.txt /
4
+ RUN pip install -r /requirements.txt
5
+
6
+ WORKDIR /app
7
+
8
+ COPY ./ /app
9
+
10
+ RUN kangas server &
11
+
12
+ CMD streamlit run app.py
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Kangas Demo
3
+ emoji: 📚
4
+ colorFrom: pink
5
+ colorTo: yellow
6
+ sdk: streamlit
7
+ sdk_version: 1.19.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: openrail
11
+ duplicated_from: derek-thomas/kangas-demo
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import partial
2
+ from pathlib import Path
3
+
4
+ import kangas as kg
5
+ import streamlit as st
6
+ import streamlit.components.v1 as components
7
+ from datasets import load_dataset
8
+
9
+ proj_dir = Path(__file__).parent
10
+
11
+ servername = 'localhost'
12
+ src = f"http://{servername}:4000"
13
+
14
+ st.set_page_config(layout="wide")
15
+
16
+ st.markdown("1. Select dataset of your choice")
17
+
18
+
19
+ def kangas_fn(dataset_repo):
20
+ repo_wo_slash = dataset_repo.replace('/', '__') + '.datagrid'
21
+ dg_file_name = repo_wo_slash + '.datagrid'
22
+ with st.spinner("Loading Dataset..."):
23
+ dataset = load_dataset(dataset_repo, split="train")
24
+ with st.spinner("Creating Kangas..."):
25
+ dg = kg.DataGrid(dataset)
26
+ with st.spinner("Saving Kangas..."):
27
+ dg.save(str(proj_dir / 'datagrids' / dg_file_name))
28
+
29
+
30
+ kg.launch(host=servername)
31
+ height = st.sidebar.slider("iFrame Height", 200, 1500, 900, 100)
32
+ scrolling = st.sidebar.checkbox("iFrame Scrolling")
33
+
34
+ hf_dataset = st.text_input("HuggingFace Dataset", value='beans')
35
+ st.button("Download and Run", on_click=partial(kangas_fn, hf_dataset))
36
+ st.markdown("""Click the dropdown in Kangas to see pre-loaded datasets""")
37
+ st.components.v1.iframe(src, None, height, scrolling=True)
bootstrap.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import streamlit.web.bootstrap
3
+ from streamlit import config as _config
4
+
5
+ proj_dir = Path(__file__).parent
6
+ filename = proj_dir / "app.py"
7
+
8
+ _config.set_option("server.headless", True)
9
+ args = []
10
+
11
+ # streamlit.cli.main_run(filename, args)
12
+ streamlit.web.bootstrap.run(str(filename), "", args, "")
datagrids/coco-500.datagrid ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff36d97d6935da95ac918b607465931a34e20ff232c80ad89faeecbc5923edcd
3
+ size 92835840
notebooks/.ipynb_checkpoints/explore-checkpoint.ipynb ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "from IPython.core.interactiveshell import InteractiveShell\n",
10
+ "InteractiveShell.ast_node_interactivity = \"all\""
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": null,
16
+ "metadata": {
17
+ "pycharm": {
18
+ "is_executing": true
19
+ }
20
+ },
21
+ "outputs": [
22
+ {
23
+ "data": {
24
+ "application/vnd.jupyter.widget-view+json": {
25
+ "model_id": "28635666517e4f4c9e2889c7a3bc10db",
26
+ "version_major": 2,
27
+ "version_minor": 0
28
+ },
29
+ "text/plain": [
30
+ "Downloading builder script: 0%| | 0.00/3.61k [00:00<?, ?B/s]"
31
+ ]
32
+ },
33
+ "metadata": {},
34
+ "output_type": "display_data"
35
+ },
36
+ {
37
+ "data": {
38
+ "application/vnd.jupyter.widget-view+json": {
39
+ "model_id": "0f9ccb664aa64f359bf15685ef3f5811",
40
+ "version_major": 2,
41
+ "version_minor": 0
42
+ },
43
+ "text/plain": [
44
+ "Downloading metadata: 0%| | 0.00/2.24k [00:00<?, ?B/s]"
45
+ ]
46
+ },
47
+ "metadata": {},
48
+ "output_type": "display_data"
49
+ },
50
+ {
51
+ "data": {
52
+ "application/vnd.jupyter.widget-view+json": {
53
+ "model_id": "efd478d8dcab433e8e8d802b4b23ec27",
54
+ "version_major": 2,
55
+ "version_minor": 0
56
+ },
57
+ "text/plain": [
58
+ "Downloading readme: 0%| | 0.00/4.75k [00:00<?, ?B/s]"
59
+ ]
60
+ },
61
+ "metadata": {},
62
+ "output_type": "display_data"
63
+ },
64
+ {
65
+ "name": "stdout",
66
+ "output_type": "stream",
67
+ "text": [
68
+ "Downloading and preparing dataset beans/default to /Users/derekthomas/.cache/huggingface/datasets/beans/default/0.0.0/90c755fb6db1c0ccdad02e897a37969dbf070bed3755d4391e269ff70642d791...\n"
69
+ ]
70
+ },
71
+ {
72
+ "data": {
73
+ "application/vnd.jupyter.widget-view+json": {
74
+ "model_id": "ec2ad1d1e6fe4be79d5b6226170250a3",
75
+ "version_major": 2,
76
+ "version_minor": 0
77
+ },
78
+ "text/plain": [
79
+ "Downloading data files: 0%| | 0/3 [00:00<?, ?it/s]"
80
+ ]
81
+ },
82
+ "metadata": {},
83
+ "output_type": "display_data"
84
+ },
85
+ {
86
+ "data": {
87
+ "application/vnd.jupyter.widget-view+json": {
88
+ "model_id": "4235b24184dc422ab70ef502e81c4b99",
89
+ "version_major": 2,
90
+ "version_minor": 0
91
+ },
92
+ "text/plain": [
93
+ "Downloading data: 0%| | 0.00/144M [00:00<?, ?B/s]"
94
+ ]
95
+ },
96
+ "metadata": {},
97
+ "output_type": "display_data"
98
+ }
99
+ ],
100
+ "source": [
101
+ "import kangas as kg\n",
102
+ "from datasets import load_dataset\n",
103
+ "\n",
104
+ "dataset = load_dataset(\"beans\", split=\"train\")\n",
105
+ "dg = kg.DataGrid(dataset)"
106
+ ]
107
+ },
108
+ {
109
+ "cell_type": "code",
110
+ "execution_count": null,
111
+ "metadata": {},
112
+ "outputs": [],
113
+ "source": []
114
+ }
115
+ ],
116
+ "metadata": {
117
+ "kernelspec": {
118
+ "display_name": "Python 3 (ipykernel)",
119
+ "language": "python",
120
+ "name": "python3"
121
+ },
122
+ "language_info": {
123
+ "codemirror_mode": {
124
+ "name": "ipython",
125
+ "version": 3
126
+ },
127
+ "file_extension": ".py",
128
+ "mimetype": "text/x-python",
129
+ "name": "python",
130
+ "nbconvert_exporter": "python",
131
+ "pygments_lexer": "ipython3",
132
+ "version": "3.9.16"
133
+ }
134
+ },
135
+ "nbformat": 4,
136
+ "nbformat_minor": 1
137
+ }
notebooks/explore.ipynb ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "from IPython.core.interactiveshell import InteractiveShell\n",
10
+ "InteractiveShell.ast_node_interactivity = \"all\""
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 2,
16
+ "metadata": {},
17
+ "outputs": [],
18
+ "source": [
19
+ "from pathlib import Path\n",
20
+ "proj_dir = Path.cwd().parent\n",
21
+ "proj_dir"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 3,
27
+ "metadata": {
28
+ "pycharm": {
29
+ "is_executing": true
30
+ }
31
+ },
32
+ "outputs": [],
33
+ "source": [
34
+ "import kangas as kg\n",
35
+ "from datasets import load_dataset\n",
36
+ "\n",
37
+ "# dataset = load_dataset(\"beans\", split=\"train\")\n",
38
+ "# dg = kg.DataGrid(dataset)"
39
+ ]
40
+ },
41
+ {
42
+ "cell_type": "code",
43
+ "execution_count": 4,
44
+ "metadata": {},
45
+ "outputs": [
46
+ {
47
+ "data": {
48
+ "text/html": [
49
+ "\n",
50
+ " <iframe\n",
51
+ " width=\"100%\"\n",
52
+ " height=\"750px\"\n",
53
+ " src=\"http://127.0.0.1:4000/\"\n",
54
+ " frameborder=\"0\"\n",
55
+ " allowfullscreen\n",
56
+ " \n",
57
+ " ></iframe>\n",
58
+ " "
59
+ ],
60
+ "text/plain": [
61
+ "<IPython.lib.display.IFrame at 0x16d9231c0>"
62
+ ]
63
+ },
64
+ "metadata": {},
65
+ "output_type": "display_data"
66
+ }
67
+ ],
68
+ "source": [
69
+ "from pathlib import Path\n",
70
+ "proj_dir = Path.cwd().parent\n",
71
+ "\n",
72
+ "import kangas as kg\n",
73
+ "from datasets import load_dataset\n",
74
+ "\n",
75
+ "dataset_repo = 'beans'\n",
76
+ "dataset = load_dataset(dataset_repo, split=\"train\")\n",
77
+ "dg = kg.DataGrid(dataset)\n",
78
+ "dg_file_name = dataset_repo.replace('/', '__') + '.datagrid' + '.2'\n",
79
+ "dg.save(proj_dir / 'datagrids' / dg_file_name)\n",
80
+ "kg.show()"
81
+ ]
82
+ },
83
+ {
84
+ "cell_type": "code",
85
+ "execution_count": null,
86
+ "metadata": {},
87
+ "outputs": [],
88
+ "source": []
89
+ }
90
+ ],
91
+ "metadata": {
92
+ "kernelspec": {
93
+ "display_name": "Python 3 (ipykernel)",
94
+ "language": "python",
95
+ "name": "python3"
96
+ },
97
+ "language_info": {
98
+ "codemirror_mode": {
99
+ "name": "ipython",
100
+ "version": 3
101
+ },
102
+ "file_extension": ".py",
103
+ "mimetype": "text/x-python",
104
+ "name": "python",
105
+ "nbconvert_exporter": "python",
106
+ "pygments_lexer": "ipython3",
107
+ "version": "3.9.16"
108
+ }
109
+ },
110
+ "nbformat": 4,
111
+ "nbformat_minor": 1
112
+ }
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ kangas==2.2.2
2
+ datasets==2.10.1
3
+ streamlit==1.19