{ "cells": [ { "cell_type": "code", "execution_count": 4, "id": "585da432", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of parquet files 30\n", "Reading geclm-datasets/samples/c4/20230404_102105_00007_t8w9z_9148d7f2-97ef-4b7b-a8f0-c8c7d56cc97e\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/bigcode_python_code/20230404_102116_00007_ajvns_c18d8279-f2a7-4d9d-a6a6-eec56dd0c918\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/bigcode_python_github_issues/20230404_102127_00022_yv77i_752e8e9c-ea57-4501-91cd-02f4c8db1559\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/bigcode_python_jupyter_markdowned_clean_dedup/20230404_102137_00026_vwcg7_b323a23a-46a8-4b3c-9701-ca80f49eeb51\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/books3/20230404_102143_00027_t4kwf_1634fcdc-0f5d-456c-b1dd-4cf8dbe58f9f\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/gutenberg_raw/20230404_102215_00007_x3ntt_08915412-5ff6-43e8-b639-d7a1fffbc2bf\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/reddit_threaded/20230404_102241_00049_xj4uk_3c4761ee-2dbb-493b-ba2f-35a1da79cd45\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/enwiki_data/20230404_102246_00007_ye63c_937aaf89-540f-4957-893b-8b8def6f0c54\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/s2orc_dedup/20230404_102252_00080_6ce5q_5b5cc649-99f2-4a73-bd99-bc344ec2f3e4\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/stackexchange2/20230404_102308_00031_qvnh6_b4d3b907-0fbb-4c24-92a7-3570be065ca2\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/commoncrawl/20230404_124237_00026_sin5w_22b6f328-0e4a-4094-bd6a-399ded4036ac\n", "Running on local URL: http://127.0.0.1:7860\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "