Ben Burtenshaw
commited on
Commit
•
dfd3683
1
Parent(s):
32014a1
lose codeless version
Browse files- defaults.py +1 -1
- hub.py +23 -1
- pages/3_🌱 Generate Dataset.py +29 -10
defaults.py
CHANGED
@@ -3,7 +3,7 @@ import json
|
|
3 |
|
4 |
SEED_DATA_PATH = "seed_data.json"
|
5 |
PIPELINE_PATH = "pipeline.yaml"
|
6 |
-
REMOTE_CODE_PATHS = ["
|
7 |
DIBT_PARENT_APP_URL = "https://argilla-domain-specific-datasets-welcome.hf.space/"
|
8 |
N_PERSPECTIVES = 5
|
9 |
N_TOPICS = 5
|
|
|
3 |
|
4 |
SEED_DATA_PATH = "seed_data.json"
|
5 |
PIPELINE_PATH = "pipeline.yaml"
|
6 |
+
REMOTE_CODE_PATHS = ["requirements.txt"]
|
7 |
DIBT_PARENT_APP_URL = "https://argilla-domain-specific-datasets-welcome.hf.space/"
|
8 |
N_PERSPECTIVES = 5
|
9 |
N_TOPICS = 5
|
hub.py
CHANGED
@@ -94,7 +94,7 @@ def push_pipeline_to_hub(
|
|
94 |
# upload the pipeline to the hub
|
95 |
hf_api.upload_file(
|
96 |
path_or_fileobj=pipeline_path,
|
97 |
-
path_in_repo="pipeline.
|
98 |
token=hub_token,
|
99 |
repo_id=repo_id,
|
100 |
repo_type="dataset",
|
@@ -127,3 +127,25 @@ def push_argilla_dataset_to_hub(
|
|
127 |
feedback_dataset = rg.FeedbackDataset.from_argilla(name=name, workspace=workspace)
|
128 |
local_dataset = feedback_dataset.pull()
|
129 |
local_dataset.push_to_huggingface(repo_id=repo_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
# upload the pipeline to the hub
|
95 |
hf_api.upload_file(
|
96 |
path_or_fileobj=pipeline_path,
|
97 |
+
path_in_repo="pipeline.py",
|
98 |
token=hub_token,
|
99 |
repo_id=repo_id,
|
100 |
repo_type="dataset",
|
|
|
127 |
feedback_dataset = rg.FeedbackDataset.from_argilla(name=name, workspace=workspace)
|
128 |
local_dataset = feedback_dataset.pull()
|
129 |
local_dataset.push_to_huggingface(repo_id=repo_id)
|
130 |
+
|
131 |
+
|
132 |
+
def push_pipeline_params(
|
133 |
+
pipeline_params,
|
134 |
+
hub_username,
|
135 |
+
hub_token: str,
|
136 |
+
project_name,
|
137 |
+
):
|
138 |
+
repo_id = f"{hub_username}/{project_name}"
|
139 |
+
temp_path = mktemp()
|
140 |
+
with open(temp_path, "w") as f:
|
141 |
+
json.dump(pipeline_params, f)
|
142 |
+
# upload the pipeline to the hub
|
143 |
+
hf_api.upload_file(
|
144 |
+
path_or_fileobj=temp_path,
|
145 |
+
path_in_repo="pipeline_params.json",
|
146 |
+
token=hub_token,
|
147 |
+
repo_id=repo_id,
|
148 |
+
repo_type="dataset",
|
149 |
+
)
|
150 |
+
|
151 |
+
print(f"Pipeline params uploaded to {repo_id}")
|
pages/3_🌱 Generate Dataset.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import streamlit as st
|
2 |
|
3 |
from defaults import ARGILLA_URL
|
|
|
4 |
from utils import project_sidebar
|
5 |
|
6 |
st.set_page_config(
|
@@ -90,6 +91,25 @@ if all(
|
|
90 |
argilla_dataset_name,
|
91 |
]
|
92 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
st.markdown(
|
94 |
"To run the pipeline locally, you need to have the `distilabel` library installed. You can install it using the following command:"
|
95 |
)
|
@@ -106,19 +126,18 @@ if all(
|
|
106 |
|
107 |
st.code(
|
108 |
f"""
|
109 |
-
# Clone the project and install the requirements
|
110 |
git clone https://huggingface.co/datasets/{hub_username}/{project_name}
|
111 |
cd {project_name}
|
112 |
pip install -r requirements.txt
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
""",
|
122 |
language="bash",
|
123 |
)
|
124 |
st.markdown(
|
|
|
1 |
import streamlit as st
|
2 |
|
3 |
from defaults import ARGILLA_URL
|
4 |
+
from hub import push_pipeline_params, push_pipeline_to_hub
|
5 |
from utils import project_sidebar
|
6 |
|
7 |
st.set_page_config(
|
|
|
91 |
argilla_dataset_name,
|
92 |
]
|
93 |
):
|
94 |
+
push_pipeline_params(
|
95 |
+
pipeline_params={
|
96 |
+
"argilla_api_key": argilla_api_key,
|
97 |
+
"argilla_api_url": argilla_url,
|
98 |
+
"argilla_dataset_name": argilla_dataset_name,
|
99 |
+
"endpoint_base_url": base_url,
|
100 |
+
},
|
101 |
+
hub_username=hub_username,
|
102 |
+
hub_token=hub_token,
|
103 |
+
project_name=project_name,
|
104 |
+
)
|
105 |
+
|
106 |
+
push_pipeline_to_hub(
|
107 |
+
pipeline_path="pipeline.py",
|
108 |
+
hub_username=hub_username,
|
109 |
+
hub_token=hub_token,
|
110 |
+
project_name=project_name,
|
111 |
+
)
|
112 |
+
|
113 |
st.markdown(
|
114 |
"To run the pipeline locally, you need to have the `distilabel` library installed. You can install it using the following command:"
|
115 |
)
|
|
|
126 |
|
127 |
st.code(
|
128 |
f"""
|
|
|
129 |
git clone https://huggingface.co/datasets/{hub_username}/{project_name}
|
130 |
cd {project_name}
|
131 |
pip install -r requirements.txt
|
132 |
+
"""
|
133 |
+
)
|
134 |
+
|
135 |
+
st.markdown("Finally, you can run the pipeline using the following command:")
|
136 |
+
|
137 |
+
st.code(
|
138 |
+
"""
|
139 |
+
huggingface-cli login
|
140 |
+
python pipeline.py""",
|
141 |
language="bash",
|
142 |
)
|
143 |
st.markdown(
|