{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Kütüphanelerin Yüklenmesi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'datasets'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[1], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mdatasets\u001b[39;00m\n\u001b[0;32m      2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdatasets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m load_dataset\n\u001b[0;32m      3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpd\u001b[39;00m \n",
      "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'datasets'"
     ]
    }
   ],
   "source": [
    "import datasets\n",
    "from datasets import load_dataset\n",
    "import pandas as pd \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "ename": "OSError",
     "evalue": "[WinError 126] Belirtilen modül bulunamadı. Error loading \"c:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\torch\\lib\\fbgemm.dll\" or one of its dependencies.",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mOSError\u001b[0m                                   Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[7], line 3\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;66;03m#verileri bart ile eğitme burada koleksiyon içerisindeki veriler tanımlanmalı \u001b[39;00m\n\u001b[0;32m      2\u001b[0m \u001b[38;5;66;03m# Load model directly\u001b[39;00m\n\u001b[1;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoModel,AutoTokenizer\n\u001b[0;32m      4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (WEIGHTS_NAME, BertConfig,\n\u001b[0;32m      5\u001b[0m                                   BertForQuestionAnswering, BertTokenizer)\n\u001b[0;32m      6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdata\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DataLoader, SequentialSampler, TensorDataset\n",
      "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\__init__.py:26\u001b[0m\n\u001b[0;32m     23\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TYPE_CHECKING\n\u001b[0;32m     25\u001b[0m \u001b[38;5;66;03m# Check the dependencies satisfy the minimal versions required.\u001b[39;00m\n\u001b[1;32m---> 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m dependency_versions_check\n\u001b[0;32m     27\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m     28\u001b[0m     OptionalDependencyNotAvailable,\n\u001b[0;32m     29\u001b[0m     _LazyModule,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     48\u001b[0m     logging,\n\u001b[0;32m     49\u001b[0m )\n\u001b[0;32m     52\u001b[0m logger \u001b[38;5;241m=\u001b[39m logging\u001b[38;5;241m.\u001b[39mget_logger(\u001b[38;5;18m__name__\u001b[39m)  \u001b[38;5;66;03m# pylint: disable=invalid-name\u001b[39;00m\n",
      "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\dependency_versions_check.py:16\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;66;03m# Copyright 2020 The HuggingFace Team. All rights reserved.\u001b[39;00m\n\u001b[0;32m      2\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[0;32m      3\u001b[0m \u001b[38;5;66;03m# Licensed under the Apache License, Version 2.0 (the \"License\");\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     12\u001b[0m \u001b[38;5;66;03m# See the License for the specific language governing permissions and\u001b[39;00m\n\u001b[0;32m     13\u001b[0m \u001b[38;5;66;03m# limitations under the License.\u001b[39;00m\n\u001b[0;32m     15\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdependency_versions_table\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m deps\n\u001b[1;32m---> 16\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mversions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m require_version, require_version_core\n\u001b[0;32m     19\u001b[0m \u001b[38;5;66;03m# define which module versions we always want to check at run time\u001b[39;00m\n\u001b[0;32m     20\u001b[0m \u001b[38;5;66;03m# (usually the ones defined in `install_requires` in setup.py)\u001b[39;00m\n\u001b[0;32m     21\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[0;32m     22\u001b[0m \u001b[38;5;66;03m# order specific notes:\u001b[39;00m\n\u001b[0;32m     23\u001b[0m \u001b[38;5;66;03m# - tqdm must be checked before tokenizers\u001b[39;00m\n\u001b[0;32m     25\u001b[0m pkgs_to_check_at_runtime \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m     26\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpython\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m     27\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtqdm\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     37\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpyyaml\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m     38\u001b[0m ]\n",
      "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\utils\\__init__.py:34\u001b[0m\n\u001b[0;32m     25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconstants\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_STD\n\u001b[0;32m     26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdoc\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m     27\u001b[0m     add_code_sample_docstrings,\n\u001b[0;32m     28\u001b[0m     add_end_docstrings,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     32\u001b[0m     replace_return_docstrings,\n\u001b[0;32m     33\u001b[0m )\n\u001b[1;32m---> 34\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeneric\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m     35\u001b[0m     ContextManagers,\n\u001b[0;32m     36\u001b[0m     ExplicitEnum,\n\u001b[0;32m     37\u001b[0m     ModelOutput,\n\u001b[0;32m     38\u001b[0m     PaddingStrategy,\n\u001b[0;32m     39\u001b[0m     TensorType,\n\u001b[0;32m     40\u001b[0m     add_model_info_to_auto_map,\n\u001b[0;32m     41\u001b[0m     add_model_info_to_custom_pipelines,\n\u001b[0;32m     42\u001b[0m     cached_property,\n\u001b[0;32m     43\u001b[0m     can_return_loss,\n\u001b[0;32m     44\u001b[0m     expand_dims,\n\u001b[0;32m     45\u001b[0m     filter_out_non_signature_kwargs,\n\u001b[0;32m     46\u001b[0m     find_labels,\n\u001b[0;32m     47\u001b[0m     flatten_dict,\n\u001b[0;32m     48\u001b[0m     infer_framework,\n\u001b[0;32m     49\u001b[0m     is_jax_tensor,\n\u001b[0;32m     50\u001b[0m     is_numpy_array,\n\u001b[0;32m     51\u001b[0m     is_tensor,\n\u001b[0;32m     52\u001b[0m     is_tf_symbolic_tensor,\n\u001b[0;32m     53\u001b[0m     is_tf_tensor,\n\u001b[0;32m     54\u001b[0m     is_torch_device,\n\u001b[0;32m     55\u001b[0m     is_torch_dtype,\n\u001b[0;32m     56\u001b[0m     is_torch_tensor,\n\u001b[0;32m     57\u001b[0m     reshape,\n\u001b[0;32m     58\u001b[0m     squeeze,\n\u001b[0;32m     59\u001b[0m     strtobool,\n\u001b[0;32m     60\u001b[0m     tensor_size,\n\u001b[0;32m     61\u001b[0m     to_numpy,\n\u001b[0;32m     62\u001b[0m     to_py_obj,\n\u001b[0;32m     63\u001b[0m     torch_float,\n\u001b[0;32m     64\u001b[0m     torch_int,\n\u001b[0;32m     65\u001b[0m     transpose,\n\u001b[0;32m     66\u001b[0m     working_or_temp_dir,\n\u001b[0;32m     67\u001b[0m )\n\u001b[0;32m     68\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mhub\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m     69\u001b[0m     CLOUDFRONT_DISTRIB_PREFIX,\n\u001b[0;32m     70\u001b[0m     HF_MODULES_CACHE,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     96\u001b[0m     try_to_load_from_cache,\n\u001b[0;32m     97\u001b[0m )\n\u001b[0;32m     98\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mimport_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m     99\u001b[0m     ACCELERATE_MIN_VERSION,\n\u001b[0;32m    100\u001b[0m     ENV_VARS_TRUE_AND_AUTO_VALUES,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    219\u001b[0m     torch_only_method,\n\u001b[0;32m    220\u001b[0m )\n",
      "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\utils\\generic.py:462\u001b[0m\n\u001b[0;32m    458\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mtuple\u001b[39m(\u001b[38;5;28mself\u001b[39m[k] \u001b[38;5;28;01mfor\u001b[39;00m k \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkeys())\n\u001b[0;32m    461\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_torch_available():\n\u001b[1;32m--> 462\u001b[0m     \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_pytree\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01m_torch_pytree\u001b[39;00m\n\u001b[0;32m    464\u001b[0m     \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_model_output_flatten\u001b[39m(output: ModelOutput) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Tuple[List[Any], \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_torch_pytree.Context\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n\u001b[0;32m    465\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mlist\u001b[39m(output\u001b[38;5;241m.\u001b[39mvalues()), \u001b[38;5;28mlist\u001b[39m(output\u001b[38;5;241m.\u001b[39mkeys())\n",
      "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\torch\\__init__.py:148\u001b[0m\n\u001b[0;32m    146\u001b[0m                 err \u001b[38;5;241m=\u001b[39m ctypes\u001b[38;5;241m.\u001b[39mWinError(ctypes\u001b[38;5;241m.\u001b[39mget_last_error())\n\u001b[0;32m    147\u001b[0m                 err\u001b[38;5;241m.\u001b[39mstrerror \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m Error loading \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdll\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m or one of its dependencies.\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m--> 148\u001b[0m                 \u001b[38;5;28;01mraise\u001b[39;00m err\n\u001b[0;32m    150\u001b[0m     kernel32\u001b[38;5;241m.\u001b[39mSetErrorMode(prev_error_mode)\n\u001b[0;32m    153\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_preload_cuda_deps\u001b[39m(lib_folder, lib_name):\n",
      "\u001b[1;31mOSError\u001b[0m: [WinError 126] Belirtilen modül bulunamadı. Error loading \"c:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\torch\\lib\\fbgemm.dll\" or one of its dependencies."
     ]
    }
   ],
   "source": [
    "#verileri bart ile eğitme burada koleksiyon içerisindeki veriler tanımlanmalı \n",
    "# Load model directly\n",
    "from transformers import AutoModel,AutoTokenizer\n",
    "from transformers import (WEIGHTS_NAME, BertConfig,\n",
    "                                  BertForQuestionAnswering, BertTokenizer)\n",
    "from torch.utils.data import DataLoader, SequentialSampler, TensorDataset\n",
    "\n",
    "#from utils import (get_answer, input_to_squad_example,squad_examples_to_features, to_list)\n",
    "import collections\n",
    "# Load model directly\n",
    "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Train ve Test Verilerine İlişkin Databaselerin İçerisindeki Bilgilerin Alınması "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "#birleştirilcek dosyaların listesi \n",
    "train_files=['C:\\\\gitProjects\\\\oak\\\\data\\\\train-00000-of-00007.parquet','C:\\\\gitProjects\\\\oak\\\\data\\\\train-00001-of-00007.parquet','C:\\\\gitProjects\\\\oak\\\\data\\\\train-00002-of-00007.parquet','C:\\\\gitProjects\\\\oak\\\\data\\\\train-00003-of-00007.parquet','C:\\\\gitProjects\\\\oak\\\\data\\\\train-00004-of-00007.parquet']\n",
    "test_files=['C:\\\\gitProjects\\\\oak\\\\data\\\\train-00005-of-00007.parquet','C:\\\\gitProjects\\\\oak\\\\data\\\\train-00006-of-00007.parquet']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#dosyaları yükleyin ve birleştirin\n",
    "train_dfs=[pd.read_parquet(file) for file in train_files]\n",
    "test_dfs=[pd.read_parquet(file) for file in test_files]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#parque dosyalarının birleştirilmesi\n",
    "train_df=pd.concat(train_dfs,ignore_index=True)\n",
    "test_df=pd.concat(test_dfs,ignore_index=True)\n",
    "\n",
    "print(train_df.head())\n",
    "print(train_df.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'train_df' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[9], line 2\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;66;03m#train  ve test dosyaları oluşturma \u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m \u001b[43mtrain_df\u001b[49m\u001b[38;5;241m.\u001b[39mto_parquet(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mC:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mgitProjects\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mdeneme\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124megitim\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mtrain_Egitim\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mmerged_train.parquet\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m      3\u001b[0m test_df\u001b[38;5;241m.\u001b[39mto_parquet(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mC:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mgitProjects\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mdeneme\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mtest_Egitim\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mmerged_train.parquet\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
      "\u001b[1;31mNameError\u001b[0m: name 'train_df' is not defined"
     ]
    }
   ],
   "source": [
    "#train  ve test dosyaları oluşturma \n",
    "train_df.to_parquet('C:\\\\gitProjects\\\\deneme\\\\egitim\\\\train_Egitim\\\\merged_train.parquet')\n",
    "test_df.to_parquet('C:\\\\gitProjects\\\\deneme\\\\test_Egitim\\\\merged_train.parquet')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                          Prompt_ID  \\\n",
      "0  bb26c95639b18fd88857bf0964cd1fb5   \n",
      "1  56743c1870327184e058292a34ce12a8   \n",
      "2  88aa2f72d37cb8671ff68a6f481e382b   \n",
      "3  703c086f7ffd9d8cc0497e82732860c7   \n",
      "4  a310cb6ed3f48e721473ec0525239e4e   \n",
      "\n",
      "                                              Prompt  \\\n",
      "0  What were the crucial factors that contributed...   \n",
      "1  Create a comprehensive guide to understanding ...   \n",
      "2  Explore the historical significance and impact...   \n",
      "3  How can advanced data analytics be leveraged t...   \n",
      "4  Design a comprehensive diversity training prog...   \n",
      "\n",
      "                                            Response  \\\n",
      "0  **Crucial Factors Contributing to the Success ...   \n",
      "1  ## Comprehensive Guide to Weather Front Types:...   \n",
      "2  ## The Fall of the Berlin Wall: Historical Sig...   \n",
      "3  **1. Real-Time Sentiment Analysis:**\\n\\n* Anal...   \n",
      "4  **Phase 1: Awareness and Self-Reflection**\\n\\n...   \n",
      "\n",
      "                      Category                  Subcategory  \\\n",
      "0              Voskhod program            Voskhod 1 mission   \n",
      "1            Science mnemonics          Weather front types   \n",
      "2            Political history  The Fall of the Berlin Wall   \n",
      "3                 Test matches               Data analytics   \n",
      "4  Majority–minority relations           Diversity training   \n",
      "\n",
      "   Prompt_token_length  \n",
      "0                   34  \n",
      "1                   48  \n",
      "2                   67  \n",
      "3                   78  \n",
      "4                   55  \n",
      "                          Prompt_ID  \\\n",
      "0  e75b977d9abe55f0d4b33d7ee6a77e43   \n",
      "1  da7b42506d0c24c5f1d2371e0f53b8fe   \n",
      "2  dc1e302eb77f44f32623f958bdf5b1f5   \n",
      "3  3e276bb9e578d719809b9654d710d6f5   \n",
      "4  3efc98322cc67bcf32abcf25576d6ba1   \n",
      "\n",
      "                                              Prompt  \\\n",
      "0  In the grand arena of intellectual discourse, ...   \n",
      "1  Amidst the tapestry of human knowledge, we inv...   \n",
      "2  In a world teeming with ideas and viewpoints, ...   \n",
      "3  Amidst the tapestry of human knowledge, we inv...   \n",
      "4  In the grand odyssey of intellectual discourse...   \n",
      "\n",
      "                                            Response Category Subcategory  \\\n",
      "0  In the spirit of the renowned English physicia...     None        None   \n",
      "1  Title: The Interplay of Politics and Psycholog...     None        None   \n",
      "2  Energy conservation has become a critical topi...     None        None   \n",
      "3  Title: Workplace Bullying: A Silent Epidemic\\n...     None        None   \n",
      "4  Title: The Grand Odyssey of Grito: A Historica...     None        None   \n",
      "\n",
      "   Prompt_token_length  \n",
      "0                  134  \n",
      "1                  121  \n",
      "2                  191  \n",
      "3                  128  \n",
      "4                  190  \n"
     ]
    }
   ],
   "source": [
    "#test ve train yollarını belirleme ve test, traindeki önemli sütunları alma\n",
    "train_file_path=('C:\\\\gitProjects\\\\deneme\\\\egitim\\\\train_Egitim\\\\merged_train.parquet')\n",
    "test_file_path=('C:\\\\gitProjects\\\\deneme\\\\egitim\\\\test_Egitim\\\\merged_train.parquet')\n",
    "\n",
    "train_df=pd.read_parquet(train_file_path,columns=['Prompt_ID','Prompt','Response','Category','Subcategory','Prompt_token_length'])\n",
    "test_df=pd.read_parquet(test_file_path,columns=['Prompt_ID','Prompt','Response','Category','Subcategory','Prompt_token_length'])\n",
    "\n",
    "print(train_df.head())\n",
    "print(test_df.head())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Modelin Tokenizer ve İsminin Girilmesi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "ename": "OSError",
     "evalue": "[WinError 126] Belirtilen modül bulunamadı. Error loading \"c:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\torch\\lib\\fbgemm.dll\" or one of its dependencies.",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mOSError\u001b[0m                                   Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[13], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoModel,AutoTokenizer,AutoModelForSeq2SeqLM\n\u001b[0;32m      2\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mphilschmid/bart-large-cnn-samsum\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m      3\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForSeq2SeqLM\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mphilschmid/bart-large-cnn-samsum\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
      "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\__init__.py:26\u001b[0m\n\u001b[0;32m     23\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TYPE_CHECKING\n\u001b[0;32m     25\u001b[0m \u001b[38;5;66;03m# Check the dependencies satisfy the minimal versions required.\u001b[39;00m\n\u001b[1;32m---> 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m dependency_versions_check\n\u001b[0;32m     27\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m     28\u001b[0m     OptionalDependencyNotAvailable,\n\u001b[0;32m     29\u001b[0m     _LazyModule,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     48\u001b[0m     logging,\n\u001b[0;32m     49\u001b[0m )\n\u001b[0;32m     52\u001b[0m logger \u001b[38;5;241m=\u001b[39m logging\u001b[38;5;241m.\u001b[39mget_logger(\u001b[38;5;18m__name__\u001b[39m)  \u001b[38;5;66;03m# pylint: disable=invalid-name\u001b[39;00m\n",
      "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\dependency_versions_check.py:16\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;66;03m# Copyright 2020 The HuggingFace Team. All rights reserved.\u001b[39;00m\n\u001b[0;32m      2\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[0;32m      3\u001b[0m \u001b[38;5;66;03m# Licensed under the Apache License, Version 2.0 (the \"License\");\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     12\u001b[0m \u001b[38;5;66;03m# See the License for the specific language governing permissions and\u001b[39;00m\n\u001b[0;32m     13\u001b[0m \u001b[38;5;66;03m# limitations under the License.\u001b[39;00m\n\u001b[0;32m     15\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdependency_versions_table\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m deps\n\u001b[1;32m---> 16\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mversions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m require_version, require_version_core\n\u001b[0;32m     19\u001b[0m \u001b[38;5;66;03m# define which module versions we always want to check at run time\u001b[39;00m\n\u001b[0;32m     20\u001b[0m \u001b[38;5;66;03m# (usually the ones defined in `install_requires` in setup.py)\u001b[39;00m\n\u001b[0;32m     21\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[0;32m     22\u001b[0m \u001b[38;5;66;03m# order specific notes:\u001b[39;00m\n\u001b[0;32m     23\u001b[0m \u001b[38;5;66;03m# - tqdm must be checked before tokenizers\u001b[39;00m\n\u001b[0;32m     25\u001b[0m pkgs_to_check_at_runtime \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m     26\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpython\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m     27\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtqdm\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     37\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpyyaml\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m     38\u001b[0m ]\n",
      "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\utils\\__init__.py:34\u001b[0m\n\u001b[0;32m     25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconstants\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_STD\n\u001b[0;32m     26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdoc\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m     27\u001b[0m     add_code_sample_docstrings,\n\u001b[0;32m     28\u001b[0m     add_end_docstrings,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     32\u001b[0m     replace_return_docstrings,\n\u001b[0;32m     33\u001b[0m )\n\u001b[1;32m---> 34\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeneric\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m     35\u001b[0m     ContextManagers,\n\u001b[0;32m     36\u001b[0m     ExplicitEnum,\n\u001b[0;32m     37\u001b[0m     ModelOutput,\n\u001b[0;32m     38\u001b[0m     PaddingStrategy,\n\u001b[0;32m     39\u001b[0m     TensorType,\n\u001b[0;32m     40\u001b[0m     add_model_info_to_auto_map,\n\u001b[0;32m     41\u001b[0m     add_model_info_to_custom_pipelines,\n\u001b[0;32m     42\u001b[0m     cached_property,\n\u001b[0;32m     43\u001b[0m     can_return_loss,\n\u001b[0;32m     44\u001b[0m     expand_dims,\n\u001b[0;32m     45\u001b[0m     filter_out_non_signature_kwargs,\n\u001b[0;32m     46\u001b[0m     find_labels,\n\u001b[0;32m     47\u001b[0m     flatten_dict,\n\u001b[0;32m     48\u001b[0m     infer_framework,\n\u001b[0;32m     49\u001b[0m     is_jax_tensor,\n\u001b[0;32m     50\u001b[0m     is_numpy_array,\n\u001b[0;32m     51\u001b[0m     is_tensor,\n\u001b[0;32m     52\u001b[0m     is_tf_symbolic_tensor,\n\u001b[0;32m     53\u001b[0m     is_tf_tensor,\n\u001b[0;32m     54\u001b[0m     is_torch_device,\n\u001b[0;32m     55\u001b[0m     is_torch_dtype,\n\u001b[0;32m     56\u001b[0m     is_torch_tensor,\n\u001b[0;32m     57\u001b[0m     reshape,\n\u001b[0;32m     58\u001b[0m     squeeze,\n\u001b[0;32m     59\u001b[0m     strtobool,\n\u001b[0;32m     60\u001b[0m     tensor_size,\n\u001b[0;32m     61\u001b[0m     to_numpy,\n\u001b[0;32m     62\u001b[0m     to_py_obj,\n\u001b[0;32m     63\u001b[0m     torch_float,\n\u001b[0;32m     64\u001b[0m     torch_int,\n\u001b[0;32m     65\u001b[0m     transpose,\n\u001b[0;32m     66\u001b[0m     working_or_temp_dir,\n\u001b[0;32m     67\u001b[0m )\n\u001b[0;32m     68\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mhub\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m     69\u001b[0m     CLOUDFRONT_DISTRIB_PREFIX,\n\u001b[0;32m     70\u001b[0m     HF_MODULES_CACHE,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     96\u001b[0m     try_to_load_from_cache,\n\u001b[0;32m     97\u001b[0m )\n\u001b[0;32m     98\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mimport_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m     99\u001b[0m     ACCELERATE_MIN_VERSION,\n\u001b[0;32m    100\u001b[0m     ENV_VARS_TRUE_AND_AUTO_VALUES,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    219\u001b[0m     torch_only_method,\n\u001b[0;32m    220\u001b[0m )\n",
      "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\utils\\generic.py:462\u001b[0m\n\u001b[0;32m    458\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mtuple\u001b[39m(\u001b[38;5;28mself\u001b[39m[k] \u001b[38;5;28;01mfor\u001b[39;00m k \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkeys())\n\u001b[0;32m    461\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_torch_available():\n\u001b[1;32m--> 462\u001b[0m     \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_pytree\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01m_torch_pytree\u001b[39;00m\n\u001b[0;32m    464\u001b[0m     \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_model_output_flatten\u001b[39m(output: ModelOutput) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Tuple[List[Any], \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_torch_pytree.Context\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n\u001b[0;32m    465\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mlist\u001b[39m(output\u001b[38;5;241m.\u001b[39mvalues()), \u001b[38;5;28mlist\u001b[39m(output\u001b[38;5;241m.\u001b[39mkeys())\n",
      "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\torch\\__init__.py:148\u001b[0m\n\u001b[0;32m    146\u001b[0m                 err \u001b[38;5;241m=\u001b[39m ctypes\u001b[38;5;241m.\u001b[39mWinError(ctypes\u001b[38;5;241m.\u001b[39mget_last_error())\n\u001b[0;32m    147\u001b[0m                 err\u001b[38;5;241m.\u001b[39mstrerror \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m Error loading \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdll\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m or one of its dependencies.\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m--> 148\u001b[0m                 \u001b[38;5;28;01mraise\u001b[39;00m err\n\u001b[0;32m    150\u001b[0m     kernel32\u001b[38;5;241m.\u001b[39mSetErrorMode(prev_error_mode)\n\u001b[0;32m    153\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_preload_cuda_deps\u001b[39m(lib_folder, lib_name):\n",
      "\u001b[1;31mOSError\u001b[0m: [WinError 126] Belirtilen modül bulunamadı. Error loading \"c:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\torch\\lib\\fbgemm.dll\" or one of its dependencies."
     ]
    }
   ],
   "source": [
    "from transformers import AutoModel,AutoTokenizer,AutoModelForSeq2SeqLM\n",
    "tokenizer = AutoTokenizer.from_pretrained(\"philschmid/bart-large-cnn-samsum\")\n",
    "model = AutoModelForSeq2SeqLM.from_pretrained(\"philschmid/bart-large-cnn-samsum\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "MongoDb üzerinden önemli sütunların çekilmesi"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Train verilerinin moongodb yhe yüklenmesi "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Data successfully loaded into MongoDB.\n"
     ]
    }
   ],
   "source": [
    "from pymongo import MongoClient\n",
    "import pandas as pd\n",
    "\n",
    "# MongoDB connection settings\n",
    "\n",
    "def get_mongodb(database_name='yeniDatabase', collection_name='train', host='localhost', port=27017):\n",
    "    \"\"\"\n",
    "    MongoDB connection and collection selection\n",
    "    \"\"\"\n",
    "    client = MongoClient(f'mongodb://{host}:{port}/')\n",
    "    db = client[database_name]\n",
    "    collection = db[collection_name]\n",
    "    return collection\n",
    "\n",
    "# Function to load dataset into MongoDB\n",
    "def dataset_read():\n",
    "    train_file_path = ('C:\\\\gitProjects\\\\deneme\\\\egitim\\\\train_Egitim\\\\merged_train.parquet')\n",
    "    data = pd.read_parquet(train_file_path, columns=['Prompt_ID', 'Prompt', 'Response', 'Category', 'Subcategory', 'Prompt_token_length'])\n",
    "    data_dict = data.to_dict(\"records\")\n",
    "\n",
    "    # Get the MongoDB collection\n",
    "    source_collection = get_mongodb(database_name='yeniDatabase', collection_name='train')  # Collection for translation\n",
    "\n",
    "    # Insert data into MongoDB\n",
    "    source_collection.insert_many(data_dict)\n",
    "\n",
    "    print(\"Data successfully loaded into MongoDB.\")\n",
    "    return source_collection\n",
    "\n",
    "# Call the function to load the dataset into MongoDB\n",
    "source_collection = dataset_read()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Test verilerinin monngodb yhe eklenmesi "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Data successfully loaded into MongoDB.\n"
     ]
    }
   ],
   "source": [
    "from pymongo import MongoClient\n",
    "import pandas as pd\n",
    "\n",
    "# MongoDB connection settings\n",
    "\n",
    "def get_mongodb(database_name='yeniDatabase', collection_name='test', host='localhost', port=27017):\n",
    "    \"\"\"\n",
    "    MongoDB connection and collection selection\n",
    "    \"\"\"\n",
    "    client = MongoClient(f'mongodb://{host}:{port}/')\n",
    "    db = client[database_name]\n",
    "    collection = db[collection_name]\n",
    "    return collection\n",
    "\n",
    "# Function to load dataset into MongoDB\n",
    "def dataset_read():\n",
    "    train_file_path = ('C:\\\\gitProjects\\\\deneme\\\\egitim\\\\test_Egitim\\\\merged_train.parquet')\n",
    "    data = pd.read_parquet(train_file_path, columns=['Prompt_ID', 'Prompt', 'Response', 'Category', 'Subcategory', 'Prompt_token_length'])\n",
    "    data_dict = data.to_dict(\"records\")\n",
    "\n",
    "    # Get the MongoDB collection\n",
    "    source_collection = get_mongodb(database_name='yeniDatabase', collection_name='test')  # Collection for translation\n",
    "\n",
    "    # Insert data into MongoDB\n",
    "    source_collection.insert_many(data_dict)\n",
    "\n",
    "    print(\"Data successfully loaded into MongoDB.\")\n",
    "    return source_collection\n",
    "\n",
    "# Call the function to load the dataset into MongoDB\n",
    "source_collection = dataset_read()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Model eğitimi için tokenleştirme "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pymongo import MongoClient\n",
    "\n",
    "def get_mongodb():\n",
    "    # MongoDB bağlantı bilgilerini döndürecek şekilde tanımlanmalıdır.\n",
    "    return 'mongodb://localhost:27017/', 'yeniDatabase', 'test'\n",
    "\n",
    "def get_average_prompt_token_length():\n",
    "    # MongoDB bağlantı bilgilerini alma\n",
    "    mongo_url, db_name, collection_name = get_mongodb()\n",
    "\n",
    "    # MongoDB'ye bağlanma\n",
    "    client = MongoClient(mongo_url)\n",
    "    db = client[db_name]\n",
    "    collection = db[collection_name]\n",
    "\n",
    "    # Tüm dökümanları çekme ve 'prompt_token_length' alanını alma\n",
    "    docs = collection.find({}, {'Prompt_token_length': 1})\n",
    "\n",
    "    # 'prompt_token_length' değerlerini toplama ve sayma\n",
    "    total_length = 0\n",
    "    count = 0\n",
    "\n",
    "    for doc in docs:\n",
    "        if 'Prompt_token_length' in doc:\n",
    "            total_length += doc['Prompt_token_length']\n",
    "            count += 1\n",
    "    \n",
    "    # Ortalama hesaplama\n",
    "    if count > 0:\n",
    "        average_length = total_length / count\n",
    "    else:\n",
    "        average_length = 0  # Eğer 'prompt_token_length' alanı olan döküman yoksa\n",
    "\n",
    "    return int(average_length)\n",
    "\n",
    "# Ortalama prompt token uzunluğunu al ve yazdır\n",
    "average_length = get_average_prompt_token_length()\n",
    "print(f\"Ortalama prompt token uzunluğu: {average_length}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# uygulama için kullanılcak olan özelliklerin tanımlanması\n",
    "from transformers import BertTokenizer,BertForQuestionAnswering,BertConfig\n",
    "class QA:\n",
    "    def __init__(self,model_path: str):\n",
    "        self.max_seq_length = 200 #max seq\n",
    "        self.doc_stride = 128 #stride \n",
    "        self.do_lower_case = False\n",
    "        self.max_query_length = 30\n",
    "        self.n_best_size = 3\n",
    "        self.max_answer_length = 30\n",
    "        self.version_2_with_negative = False\n",
    "        #modelin yüklenmesi\n",
    "        self.model, self.tokenizer = self.load_model(model_path)\n",
    "        #hangi işlmecinin kullanıldığının belirlenmesi\n",
    "        if torch.cuda.is_available():\n",
    "            self.device = 'cuda'\n",
    "        else:\n",
    "            self.device = 'cpu'\n",
    "        self.model.to(self.device)\n",
    "        self.model.eval()\n",
    "        \n",
    "        # This function is used to load the model\n",
    "    def load_model(self,model_path: str,do_lower_case=False):\n",
    "        config = BertConfig.from_pretrained(model_path + \"C:\\\\gitProjects\\\\train_Egitim\")\n",
    "        tokenizer = BertTokenizer.from_pretrained(model_path, do_lower_case=do_lower_case)\n",
    "        model = BertForQuestionAnswering.from_pretrained(model_path, from_tf=False, config=config)\n",
    "        return model, tokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}