{ "cells": [ { "cell_type": "code", "execution_count": 7, "id": "7d34f1af-07e7-4320-9cc8-085bc1848b2f", "metadata": {}, "outputs": [], "source": [ "import sys\n", "from statistics import mean\n", "import os\n", "dataset_dir = os.path.abspath(os.path.join(os.getcwd(), '..', '..', 'financial_dataset'))\n", "sys.path.append(dataset_dir)" ] }, { "cell_type": "code", "execution_count": 8, "id": "99b69912-8c9e-49b5-abf1-229217ac5e5e", "metadata": {}, "outputs": [], "source": [ "from load_test_data import get_labels_df, get_texts" ] }, { "cell_type": "code", "execution_count": 9, "id": "53b202a1-13c9-4ba0-9cba-bf6f207af9a1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "132 132\n", "92249.56060606061\n" ] } ], "source": [ "labels_dir = dataset_dir + '/csvs/'\n", "df = get_labels_df(labels_dir)\n", "texts_dir = dataset_dir + '/txts/'\n", "texts = get_texts(texts_dir)\n", "print(len(df), len(texts))\n", "print(mean(list(map(len, texts))))" ] }, { "cell_type": "code", "execution_count": 17, "id": "b1f8d856-8204-4a42-ab73-3af2ff7a728e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Label\n", "SELL 53.8\n", "HOLD 28.0\n", "BUY 18.2\n", "Name: proportion, dtype: float64" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.Label.value_counts(normalize=True).round(3)s * 100" ] }, { "cell_type": "code", "execution_count": null, "id": "683fe2a9-6b9e-442b-a740-313482c96424", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6" } }, "nbformat": 4, "nbformat_minor": 5 }