Spaces:
Running
Running
File size: 7,533 Bytes
1048854 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "d5d0ea64",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>.container { width:95% !important; }</style>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from IPython.core.display import display, HTML, Image\n",
"display(HTML(\"<style>.container { width:95% !important; }</style>\"))\n",
"%config IPCompleter.use_jedi=False"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "403c4b8a",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from IPython.display import Markdown, display, HTML, IFrame\n",
"from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator\n",
"import base64"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "1c48706a",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('./adult.csv')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "b512f166",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 32561 entries, 0 to 32560\n",
"Data columns (total 15 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 age 32561 non-null int64 \n",
" 1 workclass 32561 non-null object\n",
" 2 fnlwgt 32561 non-null int64 \n",
" 3 education 32561 non-null object\n",
" 4 education.num 32561 non-null int64 \n",
" 5 marital.status 32561 non-null object\n",
" 6 occupation 32561 non-null object\n",
" 7 relationship 32561 non-null object\n",
" 8 race 32561 non-null object\n",
" 9 sex 32561 non-null object\n",
" 10 capital.gain 32561 non-null int64 \n",
" 11 capital.loss 32561 non-null int64 \n",
" 12 hours.per.week 32561 non-null int64 \n",
" 13 native.country 32561 non-null object\n",
" 14 income 32561 non-null object\n",
"dtypes: int64(6), object(9)\n",
"memory usage: 3.7+ MB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "fce8e9f4",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "string indices must be integers",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/tmp/ipykernel_28/1621212634.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mproto\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGenericFeatureStatisticsGenerator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mProtoFromDataFrames\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mprotostr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbase64\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mb64encode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mproto\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSerializeToString\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"utf-8\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m HTML_TEMPLATE = \"\"\"\n\u001b[1;32m 4\u001b[0m \u001b[0;34m<\u001b[0m\u001b[0mscript\u001b[0m \u001b[0msrc\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js\"\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;34m<\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0mscript\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m<\u001b[0m\u001b[0mlink\u001b[0m \u001b[0mrel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"import\"\u001b[0m \u001b[0mhref\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html\"\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.9/site-packages/facets_overview/base_generic_feature_statistics_generator.py\u001b[0m in \u001b[0;36mProtoFromDataFrames\u001b[0;34m(self, dataframes, histogram_categorical_levels_count)\u001b[0m\n\u001b[1;32m 49\u001b[0m \u001b[0mdatasets\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mdataframe\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdataframes\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 51\u001b[0;31m \u001b[0mtable\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdataframe\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'table'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 52\u001b[0m \u001b[0mtable_entries\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 53\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mcol\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtable\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mTypeError\u001b[0m: string indices must be integers"
]
}
],
"source": [
"proto = GenericFeatureStatisticsGenerator().ProtoFromDataFrames(df)\n",
"protostr = base64.b64encode(proto.SerializeToString()).decode(\"utf-8\")\n",
"HTML_TEMPLATE = \"\"\"\n",
" <script src=\"https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js\"></script>\n",
" <link rel=\"import\" href=\"https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html\">\n",
" <facets-overview id=\"elem\"></facets-overview>\n",
" <script>\n",
" document.querySelector(\"#elem\").protoInput = \"{protostr}\";\n",
" </script>\"\"\"\n",
"html_str = HTML_TEMPLATE.format(protostr=protostr)\n",
"with open(\"index.html\",'w') as fo:\n",
" fo.write(html_str)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c0a817dc",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
|