File size: 3,596 Bytes
e987d7b b47dcdb e987d7b 3531f81 ca00bce 3531f81 ca00bce 3531f81 ca00bce 3531f81 ca00bce 3531f81 ca00bce 3531f81 ca00bce 3531f81 ca00bce 3531f81 3eab446 3531f81 3eab446 3531f81 3eab446 3531f81 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
# Start by setting token and debug mode before starting schedulers
import os
from huggingface_hub import logging, login
login(token=os.environ.get("HF_TOKEN"), write_permission=True)
logging.set_verbosity_debug()
# Start apps
from pathlib import Path
import gradio as gr
from app_1M_image import get_demo as get_demo_1M_image
from app_image import get_demo as get_demo_image
from app_json import get_demo as get_demo_json
def _get_demo_code(path: str) -> str:
code = Path(path).read_text()
code = code.replace("def get_demo():", "with gr.Blocks() as demo:")
code += "\n\ndemo.launch()"
return code
DEMO_EXPLANATION = """
<h1 style='text-align: center; margin-bottom: 1rem'> How to persist data from a Space to a Dataset? </h1>
This demo shows how to leverage `gradio` and `huggingface_hub` to save data from a Space to a Dataset on the Hub.
When doing so, a few things must be taken care of: file formats, concurrent writes, name collision, number of commits,
number of files, and more. The tabs below show different ways of implementing a "save to dataset" feature. Depending on the
complexity and usage of your app, you might want to use one or the other.
This Space comes as a demo for this `huggingface_hub` [guide](https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#scheduled-uploads). Please check it out if you need more technical details.
"""
JSON_DEMO_EXPLANATION = """
## Use case
- Save inputs and outputs
- Build an annotation platform
## Data
Json-able only: text and numeric but no binaries.
## Robustness
Works with concurrent users and replicas.
## Limitations
If you expect millions of lines, you must split the local JSON file into multiple files to avoid getting your file tracked as LFS (5MB) on the Hub.
## Demo
"""
IMAGE_DEMO_EXPLANATION = """
## Use case
Save images with metadata (caption, parameters, datetime, etc.).
## Robustness
Works with concurrent users and replicas.
## Limitations
- only 10k images/folder are supported on the Hub. If you expect more usage, you must save data in subfolders.
- only 1M images/repo supported on the Hub. If you expect more usage, you can zip your data before uploading. See the _1M images Dataset_ demo.
## Demo
"""
IMAGE_1M_DEMO_EXPLANATION = """
## Use case:
Save 1M images with metadata (caption, parameters, datetime, etc.).
## Robustness
Works with concurrent users and replicas.
## Limitations
None.
## Demo
"""
with gr.Blocks() as demo:
gr.Markdown(DEMO_EXPLANATION)
with gr.Tab("JSON Dataset"):
gr.Markdown(JSON_DEMO_EXPLANATION)
get_demo_json()
gr.Markdown("## Result\n\nhttps://huggingface.co/datasets/Wauplin/example-space-to-dataset-json\n\n## Code")
with gr.Accordion("Source code", open=True):
gr.Code(_get_demo_code("app_json.py"), language="python")
with gr.Tab("Image Dataset"):
gr.Markdown(IMAGE_DEMO_EXPLANATION)
get_demo_image()
gr.Markdown("## Result\n\nhttps://huggingface.co/datasets/Wauplin/example-space-to-dataset-image\n\n## Code")
with gr.Accordion("Source code", open=True):
gr.Code(_get_demo_code("app_image.py"), language="python")
with gr.Tab("1M images Dataset"):
gr.Markdown(IMAGE_1M_DEMO_EXPLANATION)
get_demo_1M_image()
gr.Markdown(
"## Result\n\nhttps://huggingface.co/datasets/Wauplin/example-space-to-dataset-image-zip\n\n## Code"
)
with gr.Accordion("Source code", open=True):
gr.Code(_get_demo_code("app_1M_image.py"), language="python")
demo.launch()
|