yonkasoft commited on
Commit
b8522d2
1 Parent(s): 36e19c5

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +1 -1
  2. load2.ipynb +63 -88
  3. model.py +179 -0
  4. requirements.txt +1 -1
Dockerfile CHANGED
@@ -11,4 +11,4 @@ COPY requirements.txt /deneme/requirements.txt
11
  RUN pip install --no-cache-dir --upgrade -r /deneme/requirements.txt
12
 
13
  # Uygulamanızı başlatma komutu
14
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
11
  RUN pip install --no-cache-dir --upgrade -r /deneme/requirements.txt
12
 
13
  # Uygulamanızı başlatma komutu
14
+ CMD ["uvicorn", "app:app2", "--host", "0.0.0.0", "--port", "7860"]
load2.ipynb CHANGED
@@ -9,23 +9,32 @@
9
  },
10
  {
11
  "cell_type": "code",
12
- "execution_count": 1,
13
  "metadata": {},
14
  "outputs": [
15
  {
16
- "ename": "ModuleNotFoundError",
17
- "evalue": "No module named 'datasets'",
18
  "output_type": "error",
19
  "traceback": [
20
  "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
21
- "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
22
- "Cell \u001b[1;32mIn[1], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mdatasets\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdatasets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m load_dataset\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpd\u001b[39;00m \n",
23
- "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'datasets'"
 
 
 
 
 
 
 
 
 
 
24
  ]
25
  }
26
  ],
27
  "source": [
28
- "import datasets\n",
29
  "from datasets import load_dataset\n",
30
  "import pandas as pd \n"
31
  ]
@@ -111,7 +120,7 @@
111
  },
112
  {
113
  "cell_type": "code",
114
- "execution_count": 9,
115
  "metadata": {},
116
  "outputs": [
117
  {
@@ -121,7 +130,7 @@
121
  "traceback": [
122
  "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
123
  "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
124
- "Cell \u001b[1;32mIn[9], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m#train ve test dosyaları oluşturma \u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m \u001b[43mtrain_df\u001b[49m\u001b[38;5;241m.\u001b[39mto_parquet(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mC:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mgitProjects\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mdeneme\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124megitim\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mtrain_Egitim\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mmerged_train.parquet\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 3\u001b[0m test_df\u001b[38;5;241m.\u001b[39mto_parquet(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mC:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mgitProjects\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mdeneme\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mtest_Egitim\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mmerged_train.parquet\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
125
  "\u001b[1;31mNameError\u001b[0m: name 'train_df' is not defined"
126
  ]
127
  }
@@ -134,7 +143,7 @@
134
  },
135
  {
136
  "cell_type": "code",
137
- "execution_count": 10,
138
  "metadata": {},
139
  "outputs": [
140
  {
@@ -226,7 +235,7 @@
226
  },
227
  {
228
  "cell_type": "code",
229
- "execution_count": 13,
230
  "metadata": {},
231
  "outputs": [
232
  {
@@ -236,7 +245,7 @@
236
  "traceback": [
237
  "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
238
  "\u001b[1;31mOSError\u001b[0m Traceback (most recent call last)",
239
- "Cell \u001b[1;32mIn[13], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoModel,AutoTokenizer,AutoModelForSeq2SeqLM\n\u001b[0;32m 2\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mphilschmid/bart-large-cnn-samsum\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 3\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForSeq2SeqLM\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mphilschmid/bart-large-cnn-samsum\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
240
  "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\__init__.py:26\u001b[0m\n\u001b[0;32m 23\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TYPE_CHECKING\n\u001b[0;32m 25\u001b[0m \u001b[38;5;66;03m# Check the dependencies satisfy the minimal versions required.\u001b[39;00m\n\u001b[1;32m---> 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m dependency_versions_check\n\u001b[0;32m 27\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 28\u001b[0m OptionalDependencyNotAvailable,\n\u001b[0;32m 29\u001b[0m _LazyModule,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 48\u001b[0m logging,\n\u001b[0;32m 49\u001b[0m )\n\u001b[0;32m 52\u001b[0m logger \u001b[38;5;241m=\u001b[39m logging\u001b[38;5;241m.\u001b[39mget_logger(\u001b[38;5;18m__name__\u001b[39m) \u001b[38;5;66;03m# pylint: disable=invalid-name\u001b[39;00m\n",
241
  "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\dependency_versions_check.py:16\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# Copyright 2020 The HuggingFace Team. All rights reserved.\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;66;03m# Licensed under the Apache License, Version 2.0 (the \"License\");\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[38;5;66;03m# See the License for the specific language governing permissions and\u001b[39;00m\n\u001b[0;32m 13\u001b[0m \u001b[38;5;66;03m# limitations under the License.\u001b[39;00m\n\u001b[0;32m 15\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdependency_versions_table\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m deps\n\u001b[1;32m---> 16\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mversions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m require_version, require_version_core\n\u001b[0;32m 19\u001b[0m \u001b[38;5;66;03m# define which module versions we always want to check at run time\u001b[39;00m\n\u001b[0;32m 20\u001b[0m \u001b[38;5;66;03m# (usually the ones defined in `install_requires` in setup.py)\u001b[39;00m\n\u001b[0;32m 21\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[0;32m 22\u001b[0m \u001b[38;5;66;03m# order specific notes:\u001b[39;00m\n\u001b[0;32m 23\u001b[0m \u001b[38;5;66;03m# - tqdm must be checked before tokenizers\u001b[39;00m\n\u001b[0;32m 25\u001b[0m pkgs_to_check_at_runtime \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m 26\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpython\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 27\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtqdm\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 37\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpyyaml\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 38\u001b[0m ]\n",
242
  "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\utils\\__init__.py:34\u001b[0m\n\u001b[0;32m 25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconstants\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_STD\n\u001b[0;32m 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdoc\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 27\u001b[0m add_code_sample_docstrings,\n\u001b[0;32m 28\u001b[0m add_end_docstrings,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 32\u001b[0m replace_return_docstrings,\n\u001b[0;32m 33\u001b[0m )\n\u001b[1;32m---> 34\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeneric\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 35\u001b[0m ContextManagers,\n\u001b[0;32m 36\u001b[0m ExplicitEnum,\n\u001b[0;32m 37\u001b[0m ModelOutput,\n\u001b[0;32m 38\u001b[0m PaddingStrategy,\n\u001b[0;32m 39\u001b[0m TensorType,\n\u001b[0;32m 40\u001b[0m add_model_info_to_auto_map,\n\u001b[0;32m 41\u001b[0m add_model_info_to_custom_pipelines,\n\u001b[0;32m 42\u001b[0m cached_property,\n\u001b[0;32m 43\u001b[0m can_return_loss,\n\u001b[0;32m 44\u001b[0m expand_dims,\n\u001b[0;32m 45\u001b[0m filter_out_non_signature_kwargs,\n\u001b[0;32m 46\u001b[0m find_labels,\n\u001b[0;32m 47\u001b[0m flatten_dict,\n\u001b[0;32m 48\u001b[0m infer_framework,\n\u001b[0;32m 49\u001b[0m is_jax_tensor,\n\u001b[0;32m 50\u001b[0m is_numpy_array,\n\u001b[0;32m 51\u001b[0m is_tensor,\n\u001b[0;32m 52\u001b[0m is_tf_symbolic_tensor,\n\u001b[0;32m 53\u001b[0m is_tf_tensor,\n\u001b[0;32m 54\u001b[0m is_torch_device,\n\u001b[0;32m 55\u001b[0m is_torch_dtype,\n\u001b[0;32m 56\u001b[0m is_torch_tensor,\n\u001b[0;32m 57\u001b[0m reshape,\n\u001b[0;32m 58\u001b[0m squeeze,\n\u001b[0;32m 59\u001b[0m strtobool,\n\u001b[0;32m 60\u001b[0m tensor_size,\n\u001b[0;32m 61\u001b[0m to_numpy,\n\u001b[0;32m 62\u001b[0m to_py_obj,\n\u001b[0;32m 63\u001b[0m torch_float,\n\u001b[0;32m 64\u001b[0m torch_int,\n\u001b[0;32m 65\u001b[0m transpose,\n\u001b[0;32m 66\u001b[0m working_or_temp_dir,\n\u001b[0;32m 67\u001b[0m )\n\u001b[0;32m 68\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mhub\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 69\u001b[0m CLOUDFRONT_DISTRIB_PREFIX,\n\u001b[0;32m 70\u001b[0m HF_MODULES_CACHE,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 96\u001b[0m try_to_load_from_cache,\n\u001b[0;32m 97\u001b[0m )\n\u001b[0;32m 98\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mimport_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 99\u001b[0m ACCELERATE_MIN_VERSION,\n\u001b[0;32m 100\u001b[0m ENV_VARS_TRUE_AND_AUTO_VALUES,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 219\u001b[0m torch_only_method,\n\u001b[0;32m 220\u001b[0m )\n",
@@ -247,9 +256,10 @@
247
  }
248
  ],
249
  "source": [
250
- "from transformers import AutoModel,AutoTokenizer,AutoModelForSeq2SeqLM\n",
251
- "tokenizer = AutoTokenizer.from_pretrained(\"philschmid/bart-large-cnn-samsum\")\n",
252
- "model = AutoModelForSeq2SeqLM.from_pretrained(\"philschmid/bart-large-cnn-samsum\")\n"
 
253
  ]
254
  },
255
  {
@@ -260,11 +270,11 @@
260
  ]
261
  },
262
  {
263
- "cell_type": "markdown",
 
264
  "metadata": {},
265
- "source": [
266
- "Train verilerinin moongodb yhe yüklenmesi "
267
- ]
268
  },
269
  {
270
  "cell_type": "code",
@@ -314,11 +324,11 @@
314
  ]
315
  },
316
  {
317
- "cell_type": "markdown",
 
318
  "metadata": {},
319
- "source": [
320
- "Test verilerinin monngodb yhe eklenmesi "
321
- ]
322
  },
323
  {
324
  "cell_type": "code",
@@ -367,70 +377,42 @@
367
  "source_collection = dataset_read()"
368
  ]
369
  },
370
- {
371
- "cell_type": "markdown",
372
- "metadata": {},
373
- "source": [
374
- "Model eğitimi için tokenleştirme "
375
- ]
376
- },
377
- {
378
- "cell_type": "code",
379
- "execution_count": null,
380
- "metadata": {},
381
- "outputs": [],
382
- "source": [
383
- "from pymongo import MongoClient\n",
384
- "\n",
385
- "def get_mongodb():\n",
386
- " # MongoDB bağlantı bilgilerini döndürecek şekilde tanımlanmalıdır.\n",
387
- " return 'mongodb://localhost:27017/', 'yeniDatabase', 'test'\n",
388
- "\n",
389
- "def get_average_prompt_token_length():\n",
390
- " # MongoDB bağlantı bilgilerini alma\n",
391
- " mongo_url, db_name, collection_name = get_mongodb()\n",
392
- "\n",
393
- " # MongoDB'ye bağlanma\n",
394
- " client = MongoClient(mongo_url)\n",
395
- " db = client[db_name]\n",
396
- " collection = db[collection_name]\n",
397
- "\n",
398
- " # Tüm dökümanları çekme ve 'prompt_token_length' alanını alma\n",
399
- " docs = collection.find({}, {'Prompt_token_length': 1})\n",
400
- "\n",
401
- " # 'prompt_token_length' değerlerini toplama ve sayma\n",
402
- " total_length = 0\n",
403
- " count = 0\n",
404
- "\n",
405
- " for doc in docs:\n",
406
- " if 'Prompt_token_length' in doc:\n",
407
- " total_length += doc['Prompt_token_length']\n",
408
- " count += 1\n",
409
- " \n",
410
- " # Ortalama hesaplama\n",
411
- " if count > 0:\n",
412
- " average_length = total_length / count\n",
413
- " else:\n",
414
- " average_length = 0 # Eğer 'prompt_token_length' alanı olan döküman yoksa\n",
415
- "\n",
416
- " return int(average_length)\n",
417
- "\n",
418
- "# Ortalama prompt token uzunluğunu al ve yazdır\n",
419
- "average_length = get_average_prompt_token_length()\n",
420
- "print(f\"Ortalama prompt token uzunluğu: {average_length}\")\n"
421
- ]
422
- },
423
  {
424
  "cell_type": "code",
425
- "execution_count": null,
426
  "metadata": {},
427
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
428
  "source": [
429
  "# uygulama için kullanılcak olan özelliklerin tanımlanması\n",
430
  "from transformers import BertTokenizer,BertForQuestionAnswering,BertConfig\n",
431
  "class QA:\n",
432
  " def __init__(self,model_path: str):\n",
433
- " self.max_seq_length = 200 #max seq\n",
434
  " self.doc_stride = 128 #stride \n",
435
  " self.do_lower_case = False\n",
436
  " self.max_query_length = 30\n",
@@ -449,18 +431,11 @@
449
  " \n",
450
  " # This function is used to load the model\n",
451
  " def load_model(self,model_path: str,do_lower_case=False):\n",
452
- " config = BertConfig.from_pretrained(model_path + \"C:\\\\gitProjects\\\\train_Egitim\")\n",
453
  " tokenizer = BertTokenizer.from_pretrained(model_path, do_lower_case=do_lower_case)\n",
454
  " model = BertForQuestionAnswering.from_pretrained(model_path, from_tf=False, config=config)\n",
455
- " return model, tokenizer"
456
  ]
457
- },
458
- {
459
- "cell_type": "code",
460
- "execution_count": null,
461
- "metadata": {},
462
- "outputs": [],
463
- "source": []
464
  }
465
  ],
466
  "metadata": {
 
9
  },
10
  {
11
  "cell_type": "code",
12
+ "execution_count": null,
13
  "metadata": {},
14
  "outputs": [
15
  {
16
+ "ename": "ValueError",
17
+ "evalue": "numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject",
18
  "output_type": "error",
19
  "traceback": [
20
  "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
21
+ "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
22
+ "Cell \u001b[1;32mIn[5], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mdatasets\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdatasets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m load_dataset\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpd\u001b[39;00m \n",
23
+ "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\datasets\\__init__.py:17\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# Copyright 2020 The HuggingFace Datasets Authors and the TensorFlow Datasets Authors.\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;66;03m# Licensed under the Apache License, Version 2.0 (the \"License\");\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[38;5;66;03m# See the License for the specific language governing permissions and\u001b[39;00m\n\u001b[0;32m 13\u001b[0m \u001b[38;5;66;03m# limitations under the License.\u001b[39;00m\n\u001b[0;32m 15\u001b[0m __version__ \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2.20.0\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m---> 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01marrow_dataset\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Dataset\n\u001b[0;32m 18\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01marrow_reader\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ReadInstruction\n\u001b[0;32m 19\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbuilder\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ArrowBasedBuilder, BeamBasedBuilder, BuilderConfig, DatasetBuilder, GeneratorBasedBuilder\n",
24
+ "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\datasets\\arrow_dataset.py:59\u001b[0m\n\u001b[0;32m 57\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mfsspec\u001b[39;00m\n\u001b[0;32m 58\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[1;32m---> 59\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[0;32m 60\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpyarrow\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpa\u001b[39;00m\n\u001b[0;32m 61\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpyarrow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcompute\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpc\u001b[39;00m\n",
25
+ "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\pandas\\__init__.py:50\u001b[0m\n\u001b[0;32m 43\u001b[0m _module \u001b[38;5;241m=\u001b[39m _err\u001b[38;5;241m.\u001b[39mname\n\u001b[0;32m 44\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[0;32m 45\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mC extension: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m_module\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not built. If you want to import \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 46\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpandas from the source directory, you may need to run \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 47\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpython setup.py build_ext\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m to build the C extensions first.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 48\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m_err\u001b[39;00m\n\u001b[1;32m---> 50\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_config\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 51\u001b[0m get_option,\n\u001b[0;32m 52\u001b[0m set_option,\n\u001b[0;32m 53\u001b[0m reset_option,\n\u001b[0;32m 54\u001b[0m describe_option,\n\u001b[0;32m 55\u001b[0m option_context,\n\u001b[0;32m 56\u001b[0m options,\n\u001b[0;32m 57\u001b[0m )\n\u001b[0;32m 59\u001b[0m \u001b[38;5;66;03m# let init-time option registration happen\u001b[39;00m\n\u001b[0;32m 60\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig_init\u001b[39;00m \u001b[38;5;66;03m# pyright: ignore[reportUnusedImport] # noqa: F401\u001b[39;00m\n",
26
+ "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\pandas\\_config\\__init__.py:20\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;124;03mpandas._config is considered explicitly upstream of everything else in pandas,\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;124;03mshould have no intra-pandas dependencies.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[38;5;124;03mare initialized.\u001b[39;00m\n\u001b[0;32m 7\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 8\u001b[0m __all__ \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m 9\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mconfig\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 10\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdetect_console_encoding\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwarn_copy_on_write\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 19\u001b[0m ]\n\u001b[1;32m---> 20\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_config\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m config\n\u001b[0;32m 21\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_config\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m dates \u001b[38;5;66;03m# pyright: ignore[reportUnusedImport] # noqa: F401\u001b[39;00m\n\u001b[0;32m 22\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_config\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 23\u001b[0m _global_config,\n\u001b[0;32m 24\u001b[0m describe_option,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 29\u001b[0m set_option,\n\u001b[0;32m 30\u001b[0m )\n",
27
+ "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\pandas\\_config\\config.py:68\u001b[0m\n\u001b[0;32m 58\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 59\u001b[0m TYPE_CHECKING,\n\u001b[0;32m 60\u001b[0m Any,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 64\u001b[0m cast,\n\u001b[0;32m 65\u001b[0m )\n\u001b[0;32m 66\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mwarnings\u001b[39;00m\n\u001b[1;32m---> 68\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_typing\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 69\u001b[0m F,\n\u001b[0;32m 70\u001b[0m T,\n\u001b[0;32m 71\u001b[0m )\n\u001b[0;32m 72\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutil\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_exceptions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m find_stack_level\n\u001b[0;32m 74\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m TYPE_CHECKING:\n",
28
+ "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\pandas\\_typing.py:198\u001b[0m\n\u001b[0;32m 192\u001b[0m Frequency \u001b[38;5;241m=\u001b[39m Union[\u001b[38;5;28mstr\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBaseOffset\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m 193\u001b[0m Axes \u001b[38;5;241m=\u001b[39m ListLike\n\u001b[0;32m 195\u001b[0m RandomState \u001b[38;5;241m=\u001b[39m Union[\n\u001b[0;32m 196\u001b[0m \u001b[38;5;28mint\u001b[39m,\n\u001b[0;32m 197\u001b[0m np\u001b[38;5;241m.\u001b[39mndarray,\n\u001b[1;32m--> 198\u001b[0m \u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrandom\u001b[49m\u001b[38;5;241m.\u001b[39mGenerator,\n\u001b[0;32m 199\u001b[0m np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mBitGenerator,\n\u001b[0;32m 200\u001b[0m np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mRandomState,\n\u001b[0;32m 201\u001b[0m ]\n\u001b[0;32m 203\u001b[0m \u001b[38;5;66;03m# dtypes\u001b[39;00m\n\u001b[0;32m 204\u001b[0m NpDtype \u001b[38;5;241m=\u001b[39m Union[\u001b[38;5;28mstr\u001b[39m, np\u001b[38;5;241m.\u001b[39mdtype, type_t[Union[\u001b[38;5;28mstr\u001b[39m, \u001b[38;5;28mcomplex\u001b[39m, \u001b[38;5;28mbool\u001b[39m, \u001b[38;5;28mobject\u001b[39m]]]\n",
29
+ "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\numpy\\__init__.py:351\u001b[0m, in \u001b[0;36m__getattr__\u001b[1;34m(attr)\u001b[0m\n\u001b[0;32m 350\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__dir__\u001b[39m():\n\u001b[1;32m--> 351\u001b[0m public_symbols \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mglobals\u001b[39m()\u001b[38;5;241m.\u001b[39mkeys() \u001b[38;5;241m|\u001b[39m {\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtesting\u001b[39m\u001b[38;5;124m'\u001b[39m}\n\u001b[0;32m 352\u001b[0m public_symbols \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m=\u001b[39m {\n\u001b[0;32m 353\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcore\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmatrixlib\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 354\u001b[0m \u001b[38;5;66;03m# These were moved in 1.25 and may be deprecated eventually:\u001b[39;00m\n\u001b[0;32m 355\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModuleDeprecationWarning\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mVisibleDeprecationWarning\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 356\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mComplexWarning\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTooHardError\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAxisError\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 357\u001b[0m }\n\u001b[0;32m 358\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mlist\u001b[39m(public_symbols)\n",
30
+ "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\numpy\\random\\__init__.py:180\u001b[0m\n\u001b[0;32m 126\u001b[0m __all__ \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m 127\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbeta\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 128\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbinomial\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 176\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mzipf\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 177\u001b[0m ]\n\u001b[0;32m 179\u001b[0m \u001b[38;5;66;03m# add these for module-freeze analysis (like PyInstaller)\u001b[39;00m\n\u001b[1;32m--> 180\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m _pickle\n\u001b[0;32m 181\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m _common\n\u001b[0;32m 182\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m _bounded_integers\n",
31
+ "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\numpy\\random\\_pickle.py:1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmtrand\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m RandomState\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_philox\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Philox\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_pcg64\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m PCG64, PCG64DXSM\n",
32
+ "File \u001b[1;32mnumpy\\\\random\\\\mtrand.pyx:1\u001b[0m, in \u001b[0;36minit numpy.random.mtrand\u001b[1;34m()\u001b[0m\n",
33
+ "\u001b[1;31mValueError\u001b[0m: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject"
34
  ]
35
  }
36
  ],
37
  "source": [
 
38
  "from datasets import load_dataset\n",
39
  "import pandas as pd \n"
40
  ]
 
120
  },
121
  {
122
  "cell_type": "code",
123
+ "execution_count": 2,
124
  "metadata": {},
125
  "outputs": [
126
  {
 
130
  "traceback": [
131
  "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
132
  "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
133
+ "Cell \u001b[1;32mIn[2], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m#train ve test dosyaları oluşturma \u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m \u001b[43mtrain_df\u001b[49m\u001b[38;5;241m.\u001b[39mto_parquet(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mC:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mgitProjects\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mdeneme\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124megitim\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mtrain_Egitim\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mmerged_train.parquet\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 3\u001b[0m test_df\u001b[38;5;241m.\u001b[39mto_parquet(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mC:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mgitProjects\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mdeneme\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mtest_Egitim\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mmerged_train.parquet\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
134
  "\u001b[1;31mNameError\u001b[0m: name 'train_df' is not defined"
135
  ]
136
  }
 
143
  },
144
  {
145
  "cell_type": "code",
146
+ "execution_count": 3,
147
  "metadata": {},
148
  "outputs": [
149
  {
 
235
  },
236
  {
237
  "cell_type": "code",
238
+ "execution_count": 3,
239
  "metadata": {},
240
  "outputs": [
241
  {
 
245
  "traceback": [
246
  "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
247
  "\u001b[1;31mOSError\u001b[0m Traceback (most recent call last)",
248
+ "Cell \u001b[1;32mIn[3], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m T5Tokenizer, T5ForConditionalGeneration\n\u001b[0;32m 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m T5Tokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgoogle/flan-t5-xxl\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m T5ForConditionalGeneration\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgoogle/flan-t5-xxl\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
249
  "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\__init__.py:26\u001b[0m\n\u001b[0;32m 23\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TYPE_CHECKING\n\u001b[0;32m 25\u001b[0m \u001b[38;5;66;03m# Check the dependencies satisfy the minimal versions required.\u001b[39;00m\n\u001b[1;32m---> 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m dependency_versions_check\n\u001b[0;32m 27\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 28\u001b[0m OptionalDependencyNotAvailable,\n\u001b[0;32m 29\u001b[0m _LazyModule,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 48\u001b[0m logging,\n\u001b[0;32m 49\u001b[0m )\n\u001b[0;32m 52\u001b[0m logger \u001b[38;5;241m=\u001b[39m logging\u001b[38;5;241m.\u001b[39mget_logger(\u001b[38;5;18m__name__\u001b[39m) \u001b[38;5;66;03m# pylint: disable=invalid-name\u001b[39;00m\n",
250
  "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\dependency_versions_check.py:16\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# Copyright 2020 The HuggingFace Team. All rights reserved.\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;66;03m# Licensed under the Apache License, Version 2.0 (the \"License\");\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[38;5;66;03m# See the License for the specific language governing permissions and\u001b[39;00m\n\u001b[0;32m 13\u001b[0m \u001b[38;5;66;03m# limitations under the License.\u001b[39;00m\n\u001b[0;32m 15\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdependency_versions_table\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m deps\n\u001b[1;32m---> 16\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mversions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m require_version, require_version_core\n\u001b[0;32m 19\u001b[0m \u001b[38;5;66;03m# define which module versions we always want to check at run time\u001b[39;00m\n\u001b[0;32m 20\u001b[0m \u001b[38;5;66;03m# (usually the ones defined in `install_requires` in setup.py)\u001b[39;00m\n\u001b[0;32m 21\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[0;32m 22\u001b[0m \u001b[38;5;66;03m# order specific notes:\u001b[39;00m\n\u001b[0;32m 23\u001b[0m \u001b[38;5;66;03m# - tqdm must be checked before tokenizers\u001b[39;00m\n\u001b[0;32m 25\u001b[0m pkgs_to_check_at_runtime \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m 26\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpython\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 27\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtqdm\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 37\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpyyaml\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 38\u001b[0m ]\n",
251
  "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\utils\\__init__.py:34\u001b[0m\n\u001b[0;32m 25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconstants\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_STD\n\u001b[0;32m 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdoc\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 27\u001b[0m add_code_sample_docstrings,\n\u001b[0;32m 28\u001b[0m add_end_docstrings,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 32\u001b[0m replace_return_docstrings,\n\u001b[0;32m 33\u001b[0m )\n\u001b[1;32m---> 34\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeneric\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 35\u001b[0m ContextManagers,\n\u001b[0;32m 36\u001b[0m ExplicitEnum,\n\u001b[0;32m 37\u001b[0m ModelOutput,\n\u001b[0;32m 38\u001b[0m PaddingStrategy,\n\u001b[0;32m 39\u001b[0m TensorType,\n\u001b[0;32m 40\u001b[0m add_model_info_to_auto_map,\n\u001b[0;32m 41\u001b[0m add_model_info_to_custom_pipelines,\n\u001b[0;32m 42\u001b[0m cached_property,\n\u001b[0;32m 43\u001b[0m can_return_loss,\n\u001b[0;32m 44\u001b[0m expand_dims,\n\u001b[0;32m 45\u001b[0m filter_out_non_signature_kwargs,\n\u001b[0;32m 46\u001b[0m find_labels,\n\u001b[0;32m 47\u001b[0m flatten_dict,\n\u001b[0;32m 48\u001b[0m infer_framework,\n\u001b[0;32m 49\u001b[0m is_jax_tensor,\n\u001b[0;32m 50\u001b[0m is_numpy_array,\n\u001b[0;32m 51\u001b[0m is_tensor,\n\u001b[0;32m 52\u001b[0m is_tf_symbolic_tensor,\n\u001b[0;32m 53\u001b[0m is_tf_tensor,\n\u001b[0;32m 54\u001b[0m is_torch_device,\n\u001b[0;32m 55\u001b[0m is_torch_dtype,\n\u001b[0;32m 56\u001b[0m is_torch_tensor,\n\u001b[0;32m 57\u001b[0m reshape,\n\u001b[0;32m 58\u001b[0m squeeze,\n\u001b[0;32m 59\u001b[0m strtobool,\n\u001b[0;32m 60\u001b[0m tensor_size,\n\u001b[0;32m 61\u001b[0m to_numpy,\n\u001b[0;32m 62\u001b[0m to_py_obj,\n\u001b[0;32m 63\u001b[0m torch_float,\n\u001b[0;32m 64\u001b[0m torch_int,\n\u001b[0;32m 65\u001b[0m transpose,\n\u001b[0;32m 66\u001b[0m working_or_temp_dir,\n\u001b[0;32m 67\u001b[0m )\n\u001b[0;32m 68\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mhub\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 69\u001b[0m CLOUDFRONT_DISTRIB_PREFIX,\n\u001b[0;32m 70\u001b[0m HF_MODULES_CACHE,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 96\u001b[0m try_to_load_from_cache,\n\u001b[0;32m 97\u001b[0m )\n\u001b[0;32m 98\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mimport_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 99\u001b[0m ACCELERATE_MIN_VERSION,\n\u001b[0;32m 100\u001b[0m ENV_VARS_TRUE_AND_AUTO_VALUES,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 219\u001b[0m torch_only_method,\n\u001b[0;32m 220\u001b[0m )\n",
 
256
  }
257
  ],
258
  "source": [
259
+ "from transformers import T5Tokenizer, T5ForConditionalGeneration\n",
260
+ "\n",
261
+ "tokenizer = T5Tokenizer.from_pretrained(\"google/flan-t5-xxl\")\n",
262
+ "model = T5ForConditionalGeneration.from_pretrained(\"google/flan-t5-xxl\")"
263
  ]
264
  },
265
  {
 
270
  ]
271
  },
272
  {
273
+ "cell_type": "code",
274
+ "execution_count": null,
275
  "metadata": {},
276
+ "outputs": [],
277
+ "source": []
 
278
  },
279
  {
280
  "cell_type": "code",
 
324
  ]
325
  },
326
  {
327
+ "cell_type": "code",
328
+ "execution_count": null,
329
  "metadata": {},
330
+ "outputs": [],
331
+ "source": []
 
332
  },
333
  {
334
  "cell_type": "code",
 
377
  "source_collection = dataset_read()"
378
  ]
379
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
  {
381
  "cell_type": "code",
382
+ "execution_count": 1,
383
  "metadata": {},
384
+ "outputs": [
385
+ {
386
+ "name": "stderr",
387
+ "output_type": "stream",
388
+ "text": [
389
+ "c:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
390
+ " from .autonotebook import tqdm as notebook_tqdm\n"
391
+ ]
392
+ },
393
+ {
394
+ "ename": "OSError",
395
+ "evalue": "[WinError 126] Belirtilen modül bulunamadı. Error loading \"c:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\torch\\lib\\fbgemm.dll\" or one of its dependencies.",
396
+ "output_type": "error",
397
+ "traceback": [
398
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
399
+ "\u001b[1;31mOSError\u001b[0m Traceback (most recent call last)",
400
+ "Cell \u001b[1;32mIn[1], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# uygulama için kullanılcak olan özelliklerin tanımlanması\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m BertTokenizer,BertForQuestionAnswering,BertConfig\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m \u001b[38;5;21;01mQA\u001b[39;00m:\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m,model_path: \u001b[38;5;28mstr\u001b[39m):\n",
401
+ "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\__init__.py:26\u001b[0m\n\u001b[0;32m 23\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TYPE_CHECKING\n\u001b[0;32m 25\u001b[0m \u001b[38;5;66;03m# Check the dependencies satisfy the minimal versions required.\u001b[39;00m\n\u001b[1;32m---> 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m dependency_versions_check\n\u001b[0;32m 27\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 28\u001b[0m OptionalDependencyNotAvailable,\n\u001b[0;32m 29\u001b[0m _LazyModule,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 48\u001b[0m logging,\n\u001b[0;32m 49\u001b[0m )\n\u001b[0;32m 52\u001b[0m logger \u001b[38;5;241m=\u001b[39m logging\u001b[38;5;241m.\u001b[39mget_logger(\u001b[38;5;18m__name__\u001b[39m) \u001b[38;5;66;03m# pylint: disable=invalid-name\u001b[39;00m\n",
402
+ "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\dependency_versions_check.py:16\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# Copyright 2020 The HuggingFace Team. All rights reserved.\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;66;03m# Licensed under the Apache License, Version 2.0 (the \"License\");\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[38;5;66;03m# See the License for the specific language governing permissions and\u001b[39;00m\n\u001b[0;32m 13\u001b[0m \u001b[38;5;66;03m# limitations under the License.\u001b[39;00m\n\u001b[0;32m 15\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdependency_versions_table\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m deps\n\u001b[1;32m---> 16\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mversions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m require_version, require_version_core\n\u001b[0;32m 19\u001b[0m \u001b[38;5;66;03m# define which module versions we always want to check at run time\u001b[39;00m\n\u001b[0;32m 20\u001b[0m \u001b[38;5;66;03m# (usually the ones defined in `install_requires` in setup.py)\u001b[39;00m\n\u001b[0;32m 21\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[0;32m 22\u001b[0m \u001b[38;5;66;03m# order specific notes:\u001b[39;00m\n\u001b[0;32m 23\u001b[0m \u001b[38;5;66;03m# - tqdm must be checked before tokenizers\u001b[39;00m\n\u001b[0;32m 25\u001b[0m pkgs_to_check_at_runtime \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m 26\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpython\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 27\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtqdm\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 37\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpyyaml\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 38\u001b[0m ]\n",
403
+ "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\utils\\__init__.py:34\u001b[0m\n\u001b[0;32m 25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconstants\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_STD\n\u001b[0;32m 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdoc\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 27\u001b[0m add_code_sample_docstrings,\n\u001b[0;32m 28\u001b[0m add_end_docstrings,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 32\u001b[0m replace_return_docstrings,\n\u001b[0;32m 33\u001b[0m )\n\u001b[1;32m---> 34\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeneric\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 35\u001b[0m ContextManagers,\n\u001b[0;32m 36\u001b[0m ExplicitEnum,\n\u001b[0;32m 37\u001b[0m ModelOutput,\n\u001b[0;32m 38\u001b[0m PaddingStrategy,\n\u001b[0;32m 39\u001b[0m TensorType,\n\u001b[0;32m 40\u001b[0m add_model_info_to_auto_map,\n\u001b[0;32m 41\u001b[0m add_model_info_to_custom_pipelines,\n\u001b[0;32m 42\u001b[0m cached_property,\n\u001b[0;32m 43\u001b[0m can_return_loss,\n\u001b[0;32m 44\u001b[0m expand_dims,\n\u001b[0;32m 45\u001b[0m filter_out_non_signature_kwargs,\n\u001b[0;32m 46\u001b[0m find_labels,\n\u001b[0;32m 47\u001b[0m flatten_dict,\n\u001b[0;32m 48\u001b[0m infer_framework,\n\u001b[0;32m 49\u001b[0m is_jax_tensor,\n\u001b[0;32m 50\u001b[0m is_numpy_array,\n\u001b[0;32m 51\u001b[0m is_tensor,\n\u001b[0;32m 52\u001b[0m is_tf_symbolic_tensor,\n\u001b[0;32m 53\u001b[0m is_tf_tensor,\n\u001b[0;32m 54\u001b[0m is_torch_device,\n\u001b[0;32m 55\u001b[0m is_torch_dtype,\n\u001b[0;32m 56\u001b[0m is_torch_tensor,\n\u001b[0;32m 57\u001b[0m reshape,\n\u001b[0;32m 58\u001b[0m squeeze,\n\u001b[0;32m 59\u001b[0m strtobool,\n\u001b[0;32m 60\u001b[0m tensor_size,\n\u001b[0;32m 61\u001b[0m to_numpy,\n\u001b[0;32m 62\u001b[0m to_py_obj,\n\u001b[0;32m 63\u001b[0m torch_float,\n\u001b[0;32m 64\u001b[0m torch_int,\n\u001b[0;32m 65\u001b[0m transpose,\n\u001b[0;32m 66\u001b[0m working_or_temp_dir,\n\u001b[0;32m 67\u001b[0m )\n\u001b[0;32m 68\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mhub\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 69\u001b[0m CLOUDFRONT_DISTRIB_PREFIX,\n\u001b[0;32m 70\u001b[0m HF_MODULES_CACHE,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 96\u001b[0m try_to_load_from_cache,\n\u001b[0;32m 97\u001b[0m )\n\u001b[0;32m 98\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mimport_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 99\u001b[0m ACCELERATE_MIN_VERSION,\n\u001b[0;32m 100\u001b[0m ENV_VARS_TRUE_AND_AUTO_VALUES,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 219\u001b[0m torch_only_method,\n\u001b[0;32m 220\u001b[0m )\n",
404
+ "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\utils\\generic.py:462\u001b[0m\n\u001b[0;32m 458\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mtuple\u001b[39m(\u001b[38;5;28mself\u001b[39m[k] \u001b[38;5;28;01mfor\u001b[39;00m k \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkeys())\n\u001b[0;32m 461\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_torch_available():\n\u001b[1;32m--> 462\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_pytree\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01m_torch_pytree\u001b[39;00m\n\u001b[0;32m 464\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_model_output_flatten\u001b[39m(output: ModelOutput) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Tuple[List[Any], \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_torch_pytree.Context\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n\u001b[0;32m 465\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mlist\u001b[39m(output\u001b[38;5;241m.\u001b[39mvalues()), \u001b[38;5;28mlist\u001b[39m(output\u001b[38;5;241m.\u001b[39mkeys())\n",
405
+ "File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\torch\\__init__.py:148\u001b[0m\n\u001b[0;32m 146\u001b[0m err \u001b[38;5;241m=\u001b[39m ctypes\u001b[38;5;241m.\u001b[39mWinError(ctypes\u001b[38;5;241m.\u001b[39mget_last_error())\n\u001b[0;32m 147\u001b[0m err\u001b[38;5;241m.\u001b[39mstrerror \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m Error loading \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdll\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m or one of its dependencies.\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m--> 148\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m err\n\u001b[0;32m 150\u001b[0m kernel32\u001b[38;5;241m.\u001b[39mSetErrorMode(prev_error_mode)\n\u001b[0;32m 153\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_preload_cuda_deps\u001b[39m(lib_folder, lib_name):\n",
406
+ "\u001b[1;31mOSError\u001b[0m: [WinError 126] Belirtilen modül bulunamadı. Error loading \"c:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\torch\\lib\\fbgemm.dll\" or one of its dependencies."
407
+ ]
408
+ }
409
+ ],
410
  "source": [
411
  "# uygulama için kullanılcak olan özelliklerin tanımlanması\n",
412
  "from transformers import BertTokenizer,BertForQuestionAnswering,BertConfig\n",
413
  "class QA:\n",
414
  " def __init__(self,model_path: str):\n",
415
+ " self.max_seq_length = 384 #max seq\n",
416
  " self.doc_stride = 128 #stride \n",
417
  " self.do_lower_case = False\n",
418
  " self.max_query_length = 30\n",
 
431
  " \n",
432
  " # This function is used to load the model\n",
433
  " def load_model(self,model_path: str,do_lower_case=False):\n",
434
+ " config = BertConfig.from_pretrained(model_path + \"C:\\\\gitProjects\\\\deneme\\\\egitim\\\\train_Egitim\")\n",
435
  " tokenizer = BertTokenizer.from_pretrained(model_path, do_lower_case=do_lower_case)\n",
436
  " model = BertForQuestionAnswering.from_pretrained(model_path, from_tf=False, config=config)\n",
437
+ " return model, tokenizer\n"
438
  ]
 
 
 
 
 
 
 
439
  }
440
  ],
441
  "metadata": {
model.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+ import pandas as pd
3
+ import torch
4
+ from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
5
+ from transformers import BertTokenizer, BertForQuestionAnswering, BertConfig
6
+ from pymongo import MongoClient
7
+
8
+ class Database:
9
+ @staticmethod
10
+ def get_mongodb():
11
+ # MongoDB bağlantı bilgilerini döndürecek şekilde tanımlanmalıdır.
12
+ return 'mongodb://localhost:27017/', 'yeniDatabase', 'train'
13
+
14
+ @staticmethod
15
+ def get_input_texts():
16
+ # MongoDB bağlantı bilgilerini alma
17
+ mongo_url, db_name, collection_name = Database.get_mongodb()
18
+ # MongoDB'ye bağlanma
19
+ client = MongoClient(mongo_url)
20
+ db = client[db_name]
21
+ collection = db[collection_name]
22
+ # Sorguyu tanımlama
23
+ query = {"Prompt": {"$exists": True}}
24
+ # Sorguyu çalıştırma ve dökümanları çekme
25
+ cursor = collection.find(query, {"Prompt": 1, "_id": 0})
26
+ # Cursor'ı döküman listesine dönüştürme
27
+ input_texts_from_db = list(cursor)
28
+ # Input text'leri döndürme
29
+ return input_texts_from_db
30
+
31
+ @staticmethod
32
+ def get_output_texts():
33
+ # MongoDB bağlantı bilgilerini alma
34
+ mongo_url, db_name, collection_name = Database.get_mongodb()
35
+ # MongoDB'ye bağlanma
36
+ client = MongoClient(mongo_url)
37
+ db = client[db_name]
38
+ collection = db[collection_name]
39
+ # Sorguyu tanımlama
40
+ query = {"Response": {"$exists": True}}
41
+ # Sorguyu çalıştırma ve dökümanları çekme
42
+ cursor = collection.find(query, {"Response": 1, "_id": 0})
43
+ # Cursor'ı döküman listesine dönüştürme
44
+ output_texts_from_db = list(cursor)
45
+ # Input text'leri döndürme
46
+ return output_texts_from_db
47
+
48
+ @staticmethod
49
+ def get_average_prompt_token_length():
50
+ # MongoDB bağlantı bilgilerini alma
51
+ mongo_url, db_name, collection_name = Database.get_mongodb()
52
+ # MongoDB'ye bağlanma
53
+ client = MongoClient(mongo_url)
54
+ db = client[db_name]
55
+ collection = db[collection_name]
56
+ # Tüm dökümanları çekme ve 'prompt_token_length' alanını alma
57
+ docs = collection.find({}, {'Prompt_token_length': 1})
58
+ # 'prompt_token_length' değerlerini toplama ve sayma
59
+ total_length = 0
60
+ count = 0
61
+ for doc in docs:
62
+ if 'Prompt_token_length' in doc:
63
+ total_length += doc['Prompt_token_length']
64
+ count += 1
65
+ # Ortalama hesaplama
66
+ average_length = total_length / count if count > 0 else 0
67
+ return int(average_length)
68
+
69
+ # Tokenizer ve Modeli yükleme
70
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
71
+ # Encode işlemi
72
+ def tokenize_and_encode(train_df,doc):
73
+
74
+ input_texts_from_db = Database.get_input_texts()
75
+ output_texts_from_db= Database.get_output_texts()
76
+ input_texts = [doc["Prompt"] for doc in input_texts_from_db]
77
+ output_texts= [doc["Response"] for doc in output_texts_from_db]
78
+ encoded = tokenizer.batch_encode_plus(
79
+
80
+ #doc['Prompt'].tolist(),
81
+ #text_pair= doc['Response'].tolist(),
82
+ input_texts,
83
+ output_texts,
84
+ padding=True,
85
+ truncation=True,
86
+ max_length=100,
87
+ return_attention_mask=True,
88
+ return_tensors='pt'
89
+ )
90
+ return encoded
91
+ encoded_data=tokenize_and_encode()
92
+
93
+ class QA:
94
+
95
+ #buradaki verilerin değeri değiştirilmeli
96
+ def __init__(self, model_path: str):
97
+ self.max_seq_length = 384
98
+ self.doc_stride = 128
99
+ self.do_lower_case = False
100
+ self.max_query_length = 30
101
+ self.n_best_size = 3
102
+ self.max_answer_length = 30
103
+ self.version_2_with_negative = False
104
+ self.model, self.tokenizer = self.load_model(model_path)
105
+ self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
106
+ self.model.to(self.device)
107
+ self.model.eval()
108
+
109
+ def load_model(self, model_path: str, do_lower_case=False):
110
+ config = BertConfig.from_pretrained(model_path)
111
+ tokenizer = BertTokenizer.from_pretrained(model_path, do_lower_case=do_lower_case)
112
+ model = BertForQuestionAnswering.from_pretrained(model_path, from_tf=False, config=config)
113
+ return model, tokenizer
114
+
115
+ def extract_features_from_dataset(self, train_df):
116
+ def get_max_length(examples):
117
+ return {
118
+ 'max_seq_length': max(len(e) for e in examples),
119
+ 'max_query_length': max(len(q) for q in examples)
120
+ }
121
+ # Örnek bir kullanım
122
+ features = get_max_length(train_df)
123
+ return features
124
+
125
+ # Ortalama prompt token uzunluğunu al ve yazdır
126
+ average_length = Database.get_average_prompt_token_length()
127
+ print(f"Ortalama prompt token uzunluğu: {average_length}")
128
+
129
+ # QA sınıfını oluştur
130
+ qa = QA(model_path='bert-base-uncased')
131
+
132
+
133
+ #tensor veri setini koda entegre etme
134
+
135
+ """# Tensor veri kümesi oluşturma
136
+ input_ids = encoded_data['input_ids']
137
+ attention_mask = encoded_data['attention_mask']
138
+ token_type_ids = encoded_data['token_type_ids']
139
+ labels = torch.tensor(data['Response'].tolist()) # Cevapları etiket olarak kullanın
140
+
141
+ # TensorDataset oluşturma
142
+ dataset = TensorDataset(input_ids, attention_mask, token_type_ids, labels)
143
+
144
+ # DataLoader oluşturma
145
+ batch_size = 16
146
+ dataloader = DataLoader(
147
+ dataset,
148
+ sampler=RandomSampler(dataset),
149
+ batch_size=batch_size
150
+ )"""
151
+
152
+ #modelin için epoch sayısının tanımlaması
153
+ """# Eğitim için optimizer
154
+ optimizer = AdamW(model.parameters(), lr=5e-5)
155
+
156
+ # Eğitim döngüsü
157
+ model.train()
158
+ for epoch in range(3): # Örnek olarak 3 epoch
159
+ for batch in dataloader:
160
+ input_ids, attention_mask, token_type_ids, labels = [t.to(device) for t in batch]
161
+ optimizer.zero_grad()
162
+ outputs = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, start_positions=labels, end_positions=labels)
163
+ loss = outputs.loss
164
+ loss.backward()
165
+ optimizer.step()
166
+ print(f"Epoch {epoch+1} loss: {loss.item()}")"""
167
+
168
+
169
+ #sonuçların sınıflandırılması
170
+ """# Modeli değerlendirme aşamasına getirme
171
+ model.eval()
172
+
173
+ # Örnek tahmin
174
+ with torch.no_grad():
175
+ for batch in dataloader:
176
+ input_ids, attention_mask, token_type_ids, _ = [t.to(device) for t in batch]
177
+ outputs = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
178
+ # Çıktıları kullanarak başlık, alt başlık ve anahtar kelimeler belirleyebilirsiniz
179
+ """
requirements.txt CHANGED
@@ -2,5 +2,5 @@ gradio==4.40.0.*
2
  pymongo==4.8.0.*
3
  pandas==2.2.2.*
4
  datasets==2.20.0.*
5
- torch==2.4.0.*
6
  transformers==4.43.4.*
 
2
  pymongo==4.8.0.*
3
  pandas==2.2.2.*
4
  datasets==2.20.0.*
5
+ torch
6
  transformers==4.43.4.*