Spaces:
Build error
Build error
Upload 4 files
Browse files- Dockerfile +1 -1
- load2.ipynb +63 -88
- model.py +179 -0
- requirements.txt +1 -1
Dockerfile
CHANGED
@@ -11,4 +11,4 @@ COPY requirements.txt /deneme/requirements.txt
|
|
11 |
RUN pip install --no-cache-dir --upgrade -r /deneme/requirements.txt
|
12 |
|
13 |
# Uygulamanızı başlatma komutu
|
14 |
-
CMD ["uvicorn", "app:
|
|
|
11 |
RUN pip install --no-cache-dir --upgrade -r /deneme/requirements.txt
|
12 |
|
13 |
# Uygulamanızı başlatma komutu
|
14 |
+
CMD ["uvicorn", "app:app2", "--host", "0.0.0.0", "--port", "7860"]
|
load2.ipynb
CHANGED
@@ -9,23 +9,32 @@
|
|
9 |
},
|
10 |
{
|
11 |
"cell_type": "code",
|
12 |
-
"execution_count":
|
13 |
"metadata": {},
|
14 |
"outputs": [
|
15 |
{
|
16 |
-
"ename": "
|
17 |
-
"evalue": "
|
18 |
"output_type": "error",
|
19 |
"traceback": [
|
20 |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
21 |
-
"\u001b[1;
|
22 |
-
"Cell \u001b[1;32mIn[
|
23 |
-
"\u001b[1;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
]
|
25 |
}
|
26 |
],
|
27 |
"source": [
|
28 |
-
"import datasets\n",
|
29 |
"from datasets import load_dataset\n",
|
30 |
"import pandas as pd \n"
|
31 |
]
|
@@ -111,7 +120,7 @@
|
|
111 |
},
|
112 |
{
|
113 |
"cell_type": "code",
|
114 |
-
"execution_count":
|
115 |
"metadata": {},
|
116 |
"outputs": [
|
117 |
{
|
@@ -121,7 +130,7 @@
|
|
121 |
"traceback": [
|
122 |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
123 |
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
|
124 |
-
"Cell \u001b[1;32mIn[
|
125 |
"\u001b[1;31mNameError\u001b[0m: name 'train_df' is not defined"
|
126 |
]
|
127 |
}
|
@@ -134,7 +143,7 @@
|
|
134 |
},
|
135 |
{
|
136 |
"cell_type": "code",
|
137 |
-
"execution_count":
|
138 |
"metadata": {},
|
139 |
"outputs": [
|
140 |
{
|
@@ -226,7 +235,7 @@
|
|
226 |
},
|
227 |
{
|
228 |
"cell_type": "code",
|
229 |
-
"execution_count":
|
230 |
"metadata": {},
|
231 |
"outputs": [
|
232 |
{
|
@@ -236,7 +245,7 @@
|
|
236 |
"traceback": [
|
237 |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
238 |
"\u001b[1;31mOSError\u001b[0m Traceback (most recent call last)",
|
239 |
-
"Cell \u001b[1;32mIn[
|
240 |
"File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\__init__.py:26\u001b[0m\n\u001b[0;32m 23\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TYPE_CHECKING\n\u001b[0;32m 25\u001b[0m \u001b[38;5;66;03m# Check the dependencies satisfy the minimal versions required.\u001b[39;00m\n\u001b[1;32m---> 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m dependency_versions_check\n\u001b[0;32m 27\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 28\u001b[0m OptionalDependencyNotAvailable,\n\u001b[0;32m 29\u001b[0m _LazyModule,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 48\u001b[0m logging,\n\u001b[0;32m 49\u001b[0m )\n\u001b[0;32m 52\u001b[0m logger \u001b[38;5;241m=\u001b[39m logging\u001b[38;5;241m.\u001b[39mget_logger(\u001b[38;5;18m__name__\u001b[39m) \u001b[38;5;66;03m# pylint: disable=invalid-name\u001b[39;00m\n",
|
241 |
"File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\dependency_versions_check.py:16\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# Copyright 2020 The HuggingFace Team. All rights reserved.\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;66;03m# Licensed under the Apache License, Version 2.0 (the \"License\");\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[38;5;66;03m# See the License for the specific language governing permissions and\u001b[39;00m\n\u001b[0;32m 13\u001b[0m \u001b[38;5;66;03m# limitations under the License.\u001b[39;00m\n\u001b[0;32m 15\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdependency_versions_table\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m deps\n\u001b[1;32m---> 16\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mversions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m require_version, require_version_core\n\u001b[0;32m 19\u001b[0m \u001b[38;5;66;03m# define which module versions we always want to check at run time\u001b[39;00m\n\u001b[0;32m 20\u001b[0m \u001b[38;5;66;03m# (usually the ones defined in `install_requires` in setup.py)\u001b[39;00m\n\u001b[0;32m 21\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[0;32m 22\u001b[0m \u001b[38;5;66;03m# order specific notes:\u001b[39;00m\n\u001b[0;32m 23\u001b[0m \u001b[38;5;66;03m# - tqdm must be checked before tokenizers\u001b[39;00m\n\u001b[0;32m 25\u001b[0m pkgs_to_check_at_runtime \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m 26\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpython\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 27\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtqdm\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 37\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpyyaml\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 38\u001b[0m ]\n",
|
242 |
"File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\utils\\__init__.py:34\u001b[0m\n\u001b[0;32m 25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconstants\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_STD\n\u001b[0;32m 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdoc\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 27\u001b[0m add_code_sample_docstrings,\n\u001b[0;32m 28\u001b[0m add_end_docstrings,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 32\u001b[0m replace_return_docstrings,\n\u001b[0;32m 33\u001b[0m )\n\u001b[1;32m---> 34\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeneric\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 35\u001b[0m ContextManagers,\n\u001b[0;32m 36\u001b[0m ExplicitEnum,\n\u001b[0;32m 37\u001b[0m ModelOutput,\n\u001b[0;32m 38\u001b[0m PaddingStrategy,\n\u001b[0;32m 39\u001b[0m TensorType,\n\u001b[0;32m 40\u001b[0m add_model_info_to_auto_map,\n\u001b[0;32m 41\u001b[0m add_model_info_to_custom_pipelines,\n\u001b[0;32m 42\u001b[0m cached_property,\n\u001b[0;32m 43\u001b[0m can_return_loss,\n\u001b[0;32m 44\u001b[0m expand_dims,\n\u001b[0;32m 45\u001b[0m filter_out_non_signature_kwargs,\n\u001b[0;32m 46\u001b[0m find_labels,\n\u001b[0;32m 47\u001b[0m flatten_dict,\n\u001b[0;32m 48\u001b[0m infer_framework,\n\u001b[0;32m 49\u001b[0m is_jax_tensor,\n\u001b[0;32m 50\u001b[0m is_numpy_array,\n\u001b[0;32m 51\u001b[0m is_tensor,\n\u001b[0;32m 52\u001b[0m is_tf_symbolic_tensor,\n\u001b[0;32m 53\u001b[0m is_tf_tensor,\n\u001b[0;32m 54\u001b[0m is_torch_device,\n\u001b[0;32m 55\u001b[0m is_torch_dtype,\n\u001b[0;32m 56\u001b[0m is_torch_tensor,\n\u001b[0;32m 57\u001b[0m reshape,\n\u001b[0;32m 58\u001b[0m squeeze,\n\u001b[0;32m 59\u001b[0m strtobool,\n\u001b[0;32m 60\u001b[0m tensor_size,\n\u001b[0;32m 61\u001b[0m to_numpy,\n\u001b[0;32m 62\u001b[0m to_py_obj,\n\u001b[0;32m 63\u001b[0m torch_float,\n\u001b[0;32m 64\u001b[0m torch_int,\n\u001b[0;32m 65\u001b[0m transpose,\n\u001b[0;32m 66\u001b[0m working_or_temp_dir,\n\u001b[0;32m 67\u001b[0m )\n\u001b[0;32m 68\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mhub\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 69\u001b[0m CLOUDFRONT_DISTRIB_PREFIX,\n\u001b[0;32m 70\u001b[0m HF_MODULES_CACHE,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 96\u001b[0m try_to_load_from_cache,\n\u001b[0;32m 97\u001b[0m )\n\u001b[0;32m 98\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mimport_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 99\u001b[0m ACCELERATE_MIN_VERSION,\n\u001b[0;32m 100\u001b[0m ENV_VARS_TRUE_AND_AUTO_VALUES,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 219\u001b[0m torch_only_method,\n\u001b[0;32m 220\u001b[0m )\n",
|
@@ -247,9 +256,10 @@
|
|
247 |
}
|
248 |
],
|
249 |
"source": [
|
250 |
-
"from transformers import
|
251 |
-
"
|
252 |
-
"
|
|
|
253 |
]
|
254 |
},
|
255 |
{
|
@@ -260,11 +270,11 @@
|
|
260 |
]
|
261 |
},
|
262 |
{
|
263 |
-
"cell_type": "
|
|
|
264 |
"metadata": {},
|
265 |
-
"
|
266 |
-
|
267 |
-
]
|
268 |
},
|
269 |
{
|
270 |
"cell_type": "code",
|
@@ -314,11 +324,11 @@
|
|
314 |
]
|
315 |
},
|
316 |
{
|
317 |
-
"cell_type": "
|
|
|
318 |
"metadata": {},
|
319 |
-
"
|
320 |
-
|
321 |
-
]
|
322 |
},
|
323 |
{
|
324 |
"cell_type": "code",
|
@@ -367,70 +377,42 @@
|
|
367 |
"source_collection = dataset_read()"
|
368 |
]
|
369 |
},
|
370 |
-
{
|
371 |
-
"cell_type": "markdown",
|
372 |
-
"metadata": {},
|
373 |
-
"source": [
|
374 |
-
"Model eğitimi için tokenleştirme "
|
375 |
-
]
|
376 |
-
},
|
377 |
-
{
|
378 |
-
"cell_type": "code",
|
379 |
-
"execution_count": null,
|
380 |
-
"metadata": {},
|
381 |
-
"outputs": [],
|
382 |
-
"source": [
|
383 |
-
"from pymongo import MongoClient\n",
|
384 |
-
"\n",
|
385 |
-
"def get_mongodb():\n",
|
386 |
-
" # MongoDB bağlantı bilgilerini döndürecek şekilde tanımlanmalıdır.\n",
|
387 |
-
" return 'mongodb://localhost:27017/', 'yeniDatabase', 'test'\n",
|
388 |
-
"\n",
|
389 |
-
"def get_average_prompt_token_length():\n",
|
390 |
-
" # MongoDB bağlantı bilgilerini alma\n",
|
391 |
-
" mongo_url, db_name, collection_name = get_mongodb()\n",
|
392 |
-
"\n",
|
393 |
-
" # MongoDB'ye bağlanma\n",
|
394 |
-
" client = MongoClient(mongo_url)\n",
|
395 |
-
" db = client[db_name]\n",
|
396 |
-
" collection = db[collection_name]\n",
|
397 |
-
"\n",
|
398 |
-
" # Tüm dökümanları çekme ve 'prompt_token_length' alanını alma\n",
|
399 |
-
" docs = collection.find({}, {'Prompt_token_length': 1})\n",
|
400 |
-
"\n",
|
401 |
-
" # 'prompt_token_length' değerlerini toplama ve sayma\n",
|
402 |
-
" total_length = 0\n",
|
403 |
-
" count = 0\n",
|
404 |
-
"\n",
|
405 |
-
" for doc in docs:\n",
|
406 |
-
" if 'Prompt_token_length' in doc:\n",
|
407 |
-
" total_length += doc['Prompt_token_length']\n",
|
408 |
-
" count += 1\n",
|
409 |
-
" \n",
|
410 |
-
" # Ortalama hesaplama\n",
|
411 |
-
" if count > 0:\n",
|
412 |
-
" average_length = total_length / count\n",
|
413 |
-
" else:\n",
|
414 |
-
" average_length = 0 # Eğer 'prompt_token_length' alanı olan döküman yoksa\n",
|
415 |
-
"\n",
|
416 |
-
" return int(average_length)\n",
|
417 |
-
"\n",
|
418 |
-
"# Ortalama prompt token uzunluğunu al ve yazdır\n",
|
419 |
-
"average_length = get_average_prompt_token_length()\n",
|
420 |
-
"print(f\"Ortalama prompt token uzunluğu: {average_length}\")\n"
|
421 |
-
]
|
422 |
-
},
|
423 |
{
|
424 |
"cell_type": "code",
|
425 |
-
"execution_count":
|
426 |
"metadata": {},
|
427 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
428 |
"source": [
|
429 |
"# uygulama için kullanılcak olan özelliklerin tanımlanması\n",
|
430 |
"from transformers import BertTokenizer,BertForQuestionAnswering,BertConfig\n",
|
431 |
"class QA:\n",
|
432 |
" def __init__(self,model_path: str):\n",
|
433 |
-
" self.max_seq_length =
|
434 |
" self.doc_stride = 128 #stride \n",
|
435 |
" self.do_lower_case = False\n",
|
436 |
" self.max_query_length = 30\n",
|
@@ -449,18 +431,11 @@
|
|
449 |
" \n",
|
450 |
" # This function is used to load the model\n",
|
451 |
" def load_model(self,model_path: str,do_lower_case=False):\n",
|
452 |
-
" config = BertConfig.from_pretrained(model_path + \"C:\\\\gitProjects\\\\train_Egitim\")\n",
|
453 |
" tokenizer = BertTokenizer.from_pretrained(model_path, do_lower_case=do_lower_case)\n",
|
454 |
" model = BertForQuestionAnswering.from_pretrained(model_path, from_tf=False, config=config)\n",
|
455 |
-
" return model, tokenizer"
|
456 |
]
|
457 |
-
},
|
458 |
-
{
|
459 |
-
"cell_type": "code",
|
460 |
-
"execution_count": null,
|
461 |
-
"metadata": {},
|
462 |
-
"outputs": [],
|
463 |
-
"source": []
|
464 |
}
|
465 |
],
|
466 |
"metadata": {
|
|
|
9 |
},
|
10 |
{
|
11 |
"cell_type": "code",
|
12 |
+
"execution_count": null,
|
13 |
"metadata": {},
|
14 |
"outputs": [
|
15 |
{
|
16 |
+
"ename": "ValueError",
|
17 |
+
"evalue": "numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject",
|
18 |
"output_type": "error",
|
19 |
"traceback": [
|
20 |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
21 |
+
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
|
22 |
+
"Cell \u001b[1;32mIn[5], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mdatasets\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdatasets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m load_dataset\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpd\u001b[39;00m \n",
|
23 |
+
"File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\datasets\\__init__.py:17\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# Copyright 2020 The HuggingFace Datasets Authors and the TensorFlow Datasets Authors.\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;66;03m# Licensed under the Apache License, Version 2.0 (the \"License\");\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[38;5;66;03m# See the License for the specific language governing permissions and\u001b[39;00m\n\u001b[0;32m 13\u001b[0m \u001b[38;5;66;03m# limitations under the License.\u001b[39;00m\n\u001b[0;32m 15\u001b[0m __version__ \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2.20.0\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m---> 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01marrow_dataset\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Dataset\n\u001b[0;32m 18\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01marrow_reader\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ReadInstruction\n\u001b[0;32m 19\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbuilder\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ArrowBasedBuilder, BeamBasedBuilder, BuilderConfig, DatasetBuilder, GeneratorBasedBuilder\n",
|
24 |
+
"File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\datasets\\arrow_dataset.py:59\u001b[0m\n\u001b[0;32m 57\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mfsspec\u001b[39;00m\n\u001b[0;32m 58\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[1;32m---> 59\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[0;32m 60\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpyarrow\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpa\u001b[39;00m\n\u001b[0;32m 61\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpyarrow\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcompute\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpc\u001b[39;00m\n",
|
25 |
+
"File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\pandas\\__init__.py:50\u001b[0m\n\u001b[0;32m 43\u001b[0m _module \u001b[38;5;241m=\u001b[39m _err\u001b[38;5;241m.\u001b[39mname\n\u001b[0;32m 44\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[0;32m 45\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mC extension: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m_module\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not built. If you want to import \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 46\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpandas from the source directory, you may need to run \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 47\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpython setup.py build_ext\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m to build the C extensions first.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 48\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m_err\u001b[39;00m\n\u001b[1;32m---> 50\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_config\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 51\u001b[0m get_option,\n\u001b[0;32m 52\u001b[0m set_option,\n\u001b[0;32m 53\u001b[0m reset_option,\n\u001b[0;32m 54\u001b[0m describe_option,\n\u001b[0;32m 55\u001b[0m option_context,\n\u001b[0;32m 56\u001b[0m options,\n\u001b[0;32m 57\u001b[0m )\n\u001b[0;32m 59\u001b[0m \u001b[38;5;66;03m# let init-time option registration happen\u001b[39;00m\n\u001b[0;32m 60\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig_init\u001b[39;00m \u001b[38;5;66;03m# pyright: ignore[reportUnusedImport] # noqa: F401\u001b[39;00m\n",
|
26 |
+
"File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\pandas\\_config\\__init__.py:20\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;124;03mpandas._config is considered explicitly upstream of everything else in pandas,\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;124;03mshould have no intra-pandas dependencies.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[38;5;124;03mare initialized.\u001b[39;00m\n\u001b[0;32m 7\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 8\u001b[0m __all__ \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m 9\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mconfig\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 10\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdetect_console_encoding\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwarn_copy_on_write\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 19\u001b[0m ]\n\u001b[1;32m---> 20\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_config\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m config\n\u001b[0;32m 21\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_config\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m dates \u001b[38;5;66;03m# pyright: ignore[reportUnusedImport] # noqa: F401\u001b[39;00m\n\u001b[0;32m 22\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_config\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 23\u001b[0m _global_config,\n\u001b[0;32m 24\u001b[0m describe_option,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 29\u001b[0m set_option,\n\u001b[0;32m 30\u001b[0m )\n",
|
27 |
+
"File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\pandas\\_config\\config.py:68\u001b[0m\n\u001b[0;32m 58\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 59\u001b[0m TYPE_CHECKING,\n\u001b[0;32m 60\u001b[0m Any,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 64\u001b[0m cast,\n\u001b[0;32m 65\u001b[0m )\n\u001b[0;32m 66\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mwarnings\u001b[39;00m\n\u001b[1;32m---> 68\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_typing\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 69\u001b[0m F,\n\u001b[0;32m 70\u001b[0m T,\n\u001b[0;32m 71\u001b[0m )\n\u001b[0;32m 72\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutil\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_exceptions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m find_stack_level\n\u001b[0;32m 74\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m TYPE_CHECKING:\n",
|
28 |
+
"File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\pandas\\_typing.py:198\u001b[0m\n\u001b[0;32m 192\u001b[0m Frequency \u001b[38;5;241m=\u001b[39m Union[\u001b[38;5;28mstr\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBaseOffset\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m 193\u001b[0m Axes \u001b[38;5;241m=\u001b[39m ListLike\n\u001b[0;32m 195\u001b[0m RandomState \u001b[38;5;241m=\u001b[39m Union[\n\u001b[0;32m 196\u001b[0m \u001b[38;5;28mint\u001b[39m,\n\u001b[0;32m 197\u001b[0m np\u001b[38;5;241m.\u001b[39mndarray,\n\u001b[1;32m--> 198\u001b[0m \u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrandom\u001b[49m\u001b[38;5;241m.\u001b[39mGenerator,\n\u001b[0;32m 199\u001b[0m np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mBitGenerator,\n\u001b[0;32m 200\u001b[0m np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mRandomState,\n\u001b[0;32m 201\u001b[0m ]\n\u001b[0;32m 203\u001b[0m \u001b[38;5;66;03m# dtypes\u001b[39;00m\n\u001b[0;32m 204\u001b[0m NpDtype \u001b[38;5;241m=\u001b[39m Union[\u001b[38;5;28mstr\u001b[39m, np\u001b[38;5;241m.\u001b[39mdtype, type_t[Union[\u001b[38;5;28mstr\u001b[39m, \u001b[38;5;28mcomplex\u001b[39m, \u001b[38;5;28mbool\u001b[39m, \u001b[38;5;28mobject\u001b[39m]]]\n",
|
29 |
+
"File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\numpy\\__init__.py:351\u001b[0m, in \u001b[0;36m__getattr__\u001b[1;34m(attr)\u001b[0m\n\u001b[0;32m 350\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__dir__\u001b[39m():\n\u001b[1;32m--> 351\u001b[0m public_symbols \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mglobals\u001b[39m()\u001b[38;5;241m.\u001b[39mkeys() \u001b[38;5;241m|\u001b[39m {\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtesting\u001b[39m\u001b[38;5;124m'\u001b[39m}\n\u001b[0;32m 352\u001b[0m public_symbols \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m=\u001b[39m {\n\u001b[0;32m 353\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcore\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmatrixlib\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 354\u001b[0m \u001b[38;5;66;03m# These were moved in 1.25 and may be deprecated eventually:\u001b[39;00m\n\u001b[0;32m 355\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModuleDeprecationWarning\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mVisibleDeprecationWarning\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 356\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mComplexWarning\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTooHardError\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAxisError\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 357\u001b[0m }\n\u001b[0;32m 358\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mlist\u001b[39m(public_symbols)\n",
|
30 |
+
"File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\numpy\\random\\__init__.py:180\u001b[0m\n\u001b[0;32m 126\u001b[0m __all__ \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m 127\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbeta\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 128\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbinomial\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 176\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mzipf\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 177\u001b[0m ]\n\u001b[0;32m 179\u001b[0m \u001b[38;5;66;03m# add these for module-freeze analysis (like PyInstaller)\u001b[39;00m\n\u001b[1;32m--> 180\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m _pickle\n\u001b[0;32m 181\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m _common\n\u001b[0;32m 182\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m _bounded_integers\n",
|
31 |
+
"File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\numpy\\random\\_pickle.py:1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmtrand\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m RandomState\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_philox\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Philox\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_pcg64\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m PCG64, PCG64DXSM\n",
|
32 |
+
"File \u001b[1;32mnumpy\\\\random\\\\mtrand.pyx:1\u001b[0m, in \u001b[0;36minit numpy.random.mtrand\u001b[1;34m()\u001b[0m\n",
|
33 |
+
"\u001b[1;31mValueError\u001b[0m: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject"
|
34 |
]
|
35 |
}
|
36 |
],
|
37 |
"source": [
|
|
|
38 |
"from datasets import load_dataset\n",
|
39 |
"import pandas as pd \n"
|
40 |
]
|
|
|
120 |
},
|
121 |
{
|
122 |
"cell_type": "code",
|
123 |
+
"execution_count": 2,
|
124 |
"metadata": {},
|
125 |
"outputs": [
|
126 |
{
|
|
|
130 |
"traceback": [
|
131 |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
132 |
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
|
133 |
+
"Cell \u001b[1;32mIn[2], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m#train ve test dosyaları oluşturma \u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m \u001b[43mtrain_df\u001b[49m\u001b[38;5;241m.\u001b[39mto_parquet(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mC:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mgitProjects\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mdeneme\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124megitim\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mtrain_Egitim\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mmerged_train.parquet\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 3\u001b[0m test_df\u001b[38;5;241m.\u001b[39mto_parquet(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mC:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mgitProjects\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mdeneme\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mtest_Egitim\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mmerged_train.parquet\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
|
134 |
"\u001b[1;31mNameError\u001b[0m: name 'train_df' is not defined"
|
135 |
]
|
136 |
}
|
|
|
143 |
},
|
144 |
{
|
145 |
"cell_type": "code",
|
146 |
+
"execution_count": 3,
|
147 |
"metadata": {},
|
148 |
"outputs": [
|
149 |
{
|
|
|
235 |
},
|
236 |
{
|
237 |
"cell_type": "code",
|
238 |
+
"execution_count": 3,
|
239 |
"metadata": {},
|
240 |
"outputs": [
|
241 |
{
|
|
|
245 |
"traceback": [
|
246 |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
247 |
"\u001b[1;31mOSError\u001b[0m Traceback (most recent call last)",
|
248 |
+
"Cell \u001b[1;32mIn[3], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m T5Tokenizer, T5ForConditionalGeneration\n\u001b[0;32m 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m T5Tokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgoogle/flan-t5-xxl\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 4\u001b[0m model \u001b[38;5;241m=\u001b[39m T5ForConditionalGeneration\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgoogle/flan-t5-xxl\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
|
249 |
"File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\__init__.py:26\u001b[0m\n\u001b[0;32m 23\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TYPE_CHECKING\n\u001b[0;32m 25\u001b[0m \u001b[38;5;66;03m# Check the dependencies satisfy the minimal versions required.\u001b[39;00m\n\u001b[1;32m---> 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m dependency_versions_check\n\u001b[0;32m 27\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 28\u001b[0m OptionalDependencyNotAvailable,\n\u001b[0;32m 29\u001b[0m _LazyModule,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 48\u001b[0m logging,\n\u001b[0;32m 49\u001b[0m )\n\u001b[0;32m 52\u001b[0m logger \u001b[38;5;241m=\u001b[39m logging\u001b[38;5;241m.\u001b[39mget_logger(\u001b[38;5;18m__name__\u001b[39m) \u001b[38;5;66;03m# pylint: disable=invalid-name\u001b[39;00m\n",
|
250 |
"File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\dependency_versions_check.py:16\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# Copyright 2020 The HuggingFace Team. All rights reserved.\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;66;03m# Licensed under the Apache License, Version 2.0 (the \"License\");\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[38;5;66;03m# See the License for the specific language governing permissions and\u001b[39;00m\n\u001b[0;32m 13\u001b[0m \u001b[38;5;66;03m# limitations under the License.\u001b[39;00m\n\u001b[0;32m 15\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdependency_versions_table\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m deps\n\u001b[1;32m---> 16\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mversions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m require_version, require_version_core\n\u001b[0;32m 19\u001b[0m \u001b[38;5;66;03m# define which module versions we always want to check at run time\u001b[39;00m\n\u001b[0;32m 20\u001b[0m \u001b[38;5;66;03m# (usually the ones defined in `install_requires` in setup.py)\u001b[39;00m\n\u001b[0;32m 21\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[0;32m 22\u001b[0m \u001b[38;5;66;03m# order specific notes:\u001b[39;00m\n\u001b[0;32m 23\u001b[0m \u001b[38;5;66;03m# - tqdm must be checked before tokenizers\u001b[39;00m\n\u001b[0;32m 25\u001b[0m pkgs_to_check_at_runtime \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m 26\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpython\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 27\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtqdm\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 37\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpyyaml\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 38\u001b[0m ]\n",
|
251 |
"File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\utils\\__init__.py:34\u001b[0m\n\u001b[0;32m 25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconstants\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_STD\n\u001b[0;32m 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdoc\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 27\u001b[0m add_code_sample_docstrings,\n\u001b[0;32m 28\u001b[0m add_end_docstrings,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 32\u001b[0m replace_return_docstrings,\n\u001b[0;32m 33\u001b[0m )\n\u001b[1;32m---> 34\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeneric\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 35\u001b[0m ContextManagers,\n\u001b[0;32m 36\u001b[0m ExplicitEnum,\n\u001b[0;32m 37\u001b[0m ModelOutput,\n\u001b[0;32m 38\u001b[0m PaddingStrategy,\n\u001b[0;32m 39\u001b[0m TensorType,\n\u001b[0;32m 40\u001b[0m add_model_info_to_auto_map,\n\u001b[0;32m 41\u001b[0m add_model_info_to_custom_pipelines,\n\u001b[0;32m 42\u001b[0m cached_property,\n\u001b[0;32m 43\u001b[0m can_return_loss,\n\u001b[0;32m 44\u001b[0m expand_dims,\n\u001b[0;32m 45\u001b[0m filter_out_non_signature_kwargs,\n\u001b[0;32m 46\u001b[0m find_labels,\n\u001b[0;32m 47\u001b[0m flatten_dict,\n\u001b[0;32m 48\u001b[0m infer_framework,\n\u001b[0;32m 49\u001b[0m is_jax_tensor,\n\u001b[0;32m 50\u001b[0m is_numpy_array,\n\u001b[0;32m 51\u001b[0m is_tensor,\n\u001b[0;32m 52\u001b[0m is_tf_symbolic_tensor,\n\u001b[0;32m 53\u001b[0m is_tf_tensor,\n\u001b[0;32m 54\u001b[0m is_torch_device,\n\u001b[0;32m 55\u001b[0m is_torch_dtype,\n\u001b[0;32m 56\u001b[0m is_torch_tensor,\n\u001b[0;32m 57\u001b[0m reshape,\n\u001b[0;32m 58\u001b[0m squeeze,\n\u001b[0;32m 59\u001b[0m strtobool,\n\u001b[0;32m 60\u001b[0m tensor_size,\n\u001b[0;32m 61\u001b[0m to_numpy,\n\u001b[0;32m 62\u001b[0m to_py_obj,\n\u001b[0;32m 63\u001b[0m torch_float,\n\u001b[0;32m 64\u001b[0m torch_int,\n\u001b[0;32m 65\u001b[0m transpose,\n\u001b[0;32m 66\u001b[0m working_or_temp_dir,\n\u001b[0;32m 67\u001b[0m )\n\u001b[0;32m 68\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mhub\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 69\u001b[0m CLOUDFRONT_DISTRIB_PREFIX,\n\u001b[0;32m 70\u001b[0m HF_MODULES_CACHE,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 96\u001b[0m try_to_load_from_cache,\n\u001b[0;32m 97\u001b[0m )\n\u001b[0;32m 98\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mimport_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 99\u001b[0m ACCELERATE_MIN_VERSION,\n\u001b[0;32m 100\u001b[0m ENV_VARS_TRUE_AND_AUTO_VALUES,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 219\u001b[0m torch_only_method,\n\u001b[0;32m 220\u001b[0m )\n",
|
|
|
256 |
}
|
257 |
],
|
258 |
"source": [
|
259 |
+
"from transformers import T5Tokenizer, T5ForConditionalGeneration\n",
|
260 |
+
"\n",
|
261 |
+
"tokenizer = T5Tokenizer.from_pretrained(\"google/flan-t5-xxl\")\n",
|
262 |
+
"model = T5ForConditionalGeneration.from_pretrained(\"google/flan-t5-xxl\")"
|
263 |
]
|
264 |
},
|
265 |
{
|
|
|
270 |
]
|
271 |
},
|
272 |
{
|
273 |
+
"cell_type": "code",
|
274 |
+
"execution_count": null,
|
275 |
"metadata": {},
|
276 |
+
"outputs": [],
|
277 |
+
"source": []
|
|
|
278 |
},
|
279 |
{
|
280 |
"cell_type": "code",
|
|
|
324 |
]
|
325 |
},
|
326 |
{
|
327 |
+
"cell_type": "code",
|
328 |
+
"execution_count": null,
|
329 |
"metadata": {},
|
330 |
+
"outputs": [],
|
331 |
+
"source": []
|
|
|
332 |
},
|
333 |
{
|
334 |
"cell_type": "code",
|
|
|
377 |
"source_collection = dataset_read()"
|
378 |
]
|
379 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
380 |
{
|
381 |
"cell_type": "code",
|
382 |
+
"execution_count": 1,
|
383 |
"metadata": {},
|
384 |
+
"outputs": [
|
385 |
+
{
|
386 |
+
"name": "stderr",
|
387 |
+
"output_type": "stream",
|
388 |
+
"text": [
|
389 |
+
"c:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
390 |
+
" from .autonotebook import tqdm as notebook_tqdm\n"
|
391 |
+
]
|
392 |
+
},
|
393 |
+
{
|
394 |
+
"ename": "OSError",
|
395 |
+
"evalue": "[WinError 126] Belirtilen modül bulunamadı. Error loading \"c:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\torch\\lib\\fbgemm.dll\" or one of its dependencies.",
|
396 |
+
"output_type": "error",
|
397 |
+
"traceback": [
|
398 |
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
399 |
+
"\u001b[1;31mOSError\u001b[0m Traceback (most recent call last)",
|
400 |
+
"Cell \u001b[1;32mIn[1], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# uygulama için kullanılcak olan özelliklerin tanımlanması\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m BertTokenizer,BertForQuestionAnswering,BertConfig\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m \u001b[38;5;21;01mQA\u001b[39;00m:\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m,model_path: \u001b[38;5;28mstr\u001b[39m):\n",
|
401 |
+
"File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\__init__.py:26\u001b[0m\n\u001b[0;32m 23\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TYPE_CHECKING\n\u001b[0;32m 25\u001b[0m \u001b[38;5;66;03m# Check the dependencies satisfy the minimal versions required.\u001b[39;00m\n\u001b[1;32m---> 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m dependency_versions_check\n\u001b[0;32m 27\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 28\u001b[0m OptionalDependencyNotAvailable,\n\u001b[0;32m 29\u001b[0m _LazyModule,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 48\u001b[0m logging,\n\u001b[0;32m 49\u001b[0m )\n\u001b[0;32m 52\u001b[0m logger \u001b[38;5;241m=\u001b[39m logging\u001b[38;5;241m.\u001b[39mget_logger(\u001b[38;5;18m__name__\u001b[39m) \u001b[38;5;66;03m# pylint: disable=invalid-name\u001b[39;00m\n",
|
402 |
+
"File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\dependency_versions_check.py:16\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# Copyright 2020 The HuggingFace Team. All rights reserved.\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;66;03m# Licensed under the Apache License, Version 2.0 (the \"License\");\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[38;5;66;03m# See the License for the specific language governing permissions and\u001b[39;00m\n\u001b[0;32m 13\u001b[0m \u001b[38;5;66;03m# limitations under the License.\u001b[39;00m\n\u001b[0;32m 15\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdependency_versions_table\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m deps\n\u001b[1;32m---> 16\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mversions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m require_version, require_version_core\n\u001b[0;32m 19\u001b[0m \u001b[38;5;66;03m# define which module versions we always want to check at run time\u001b[39;00m\n\u001b[0;32m 20\u001b[0m \u001b[38;5;66;03m# (usually the ones defined in `install_requires` in setup.py)\u001b[39;00m\n\u001b[0;32m 21\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[0;32m 22\u001b[0m \u001b[38;5;66;03m# order specific notes:\u001b[39;00m\n\u001b[0;32m 23\u001b[0m \u001b[38;5;66;03m# - tqdm must be checked before tokenizers\u001b[39;00m\n\u001b[0;32m 25\u001b[0m pkgs_to_check_at_runtime \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m 26\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpython\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 27\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtqdm\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 37\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpyyaml\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 38\u001b[0m ]\n",
|
403 |
+
"File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\utils\\__init__.py:34\u001b[0m\n\u001b[0;32m 25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconstants\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_STANDARD_MEAN, IMAGENET_STANDARD_STD\n\u001b[0;32m 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdoc\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 27\u001b[0m add_code_sample_docstrings,\n\u001b[0;32m 28\u001b[0m add_end_docstrings,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 32\u001b[0m replace_return_docstrings,\n\u001b[0;32m 33\u001b[0m )\n\u001b[1;32m---> 34\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeneric\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 35\u001b[0m ContextManagers,\n\u001b[0;32m 36\u001b[0m ExplicitEnum,\n\u001b[0;32m 37\u001b[0m ModelOutput,\n\u001b[0;32m 38\u001b[0m PaddingStrategy,\n\u001b[0;32m 39\u001b[0m TensorType,\n\u001b[0;32m 40\u001b[0m add_model_info_to_auto_map,\n\u001b[0;32m 41\u001b[0m add_model_info_to_custom_pipelines,\n\u001b[0;32m 42\u001b[0m cached_property,\n\u001b[0;32m 43\u001b[0m can_return_loss,\n\u001b[0;32m 44\u001b[0m expand_dims,\n\u001b[0;32m 45\u001b[0m filter_out_non_signature_kwargs,\n\u001b[0;32m 46\u001b[0m find_labels,\n\u001b[0;32m 47\u001b[0m flatten_dict,\n\u001b[0;32m 48\u001b[0m infer_framework,\n\u001b[0;32m 49\u001b[0m is_jax_tensor,\n\u001b[0;32m 50\u001b[0m is_numpy_array,\n\u001b[0;32m 51\u001b[0m is_tensor,\n\u001b[0;32m 52\u001b[0m is_tf_symbolic_tensor,\n\u001b[0;32m 53\u001b[0m is_tf_tensor,\n\u001b[0;32m 54\u001b[0m is_torch_device,\n\u001b[0;32m 55\u001b[0m is_torch_dtype,\n\u001b[0;32m 56\u001b[0m is_torch_tensor,\n\u001b[0;32m 57\u001b[0m reshape,\n\u001b[0;32m 58\u001b[0m squeeze,\n\u001b[0;32m 59\u001b[0m strtobool,\n\u001b[0;32m 60\u001b[0m tensor_size,\n\u001b[0;32m 61\u001b[0m to_numpy,\n\u001b[0;32m 62\u001b[0m to_py_obj,\n\u001b[0;32m 63\u001b[0m torch_float,\n\u001b[0;32m 64\u001b[0m torch_int,\n\u001b[0;32m 65\u001b[0m transpose,\n\u001b[0;32m 66\u001b[0m working_or_temp_dir,\n\u001b[0;32m 67\u001b[0m )\n\u001b[0;32m 68\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mhub\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 69\u001b[0m CLOUDFRONT_DISTRIB_PREFIX,\n\u001b[0;32m 70\u001b[0m HF_MODULES_CACHE,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 96\u001b[0m try_to_load_from_cache,\n\u001b[0;32m 97\u001b[0m )\n\u001b[0;32m 98\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mimport_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[0;32m 99\u001b[0m ACCELERATE_MIN_VERSION,\n\u001b[0;32m 100\u001b[0m ENV_VARS_TRUE_AND_AUTO_VALUES,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 219\u001b[0m torch_only_method,\n\u001b[0;32m 220\u001b[0m )\n",
|
404 |
+
"File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\transformers\\utils\\generic.py:462\u001b[0m\n\u001b[0;32m 458\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mtuple\u001b[39m(\u001b[38;5;28mself\u001b[39m[k] \u001b[38;5;28;01mfor\u001b[39;00m k \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkeys())\n\u001b[0;32m 461\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_torch_available():\n\u001b[1;32m--> 462\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_pytree\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01m_torch_pytree\u001b[39;00m\n\u001b[0;32m 464\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_model_output_flatten\u001b[39m(output: ModelOutput) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Tuple[List[Any], \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_torch_pytree.Context\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n\u001b[0;32m 465\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mlist\u001b[39m(output\u001b[38;5;241m.\u001b[39mvalues()), \u001b[38;5;28mlist\u001b[39m(output\u001b[38;5;241m.\u001b[39mkeys())\n",
|
405 |
+
"File \u001b[1;32mc:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\torch\\__init__.py:148\u001b[0m\n\u001b[0;32m 146\u001b[0m err \u001b[38;5;241m=\u001b[39m ctypes\u001b[38;5;241m.\u001b[39mWinError(ctypes\u001b[38;5;241m.\u001b[39mget_last_error())\n\u001b[0;32m 147\u001b[0m err\u001b[38;5;241m.\u001b[39mstrerror \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m Error loading \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdll\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m or one of its dependencies.\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m--> 148\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m err\n\u001b[0;32m 150\u001b[0m kernel32\u001b[38;5;241m.\u001b[39mSetErrorMode(prev_error_mode)\n\u001b[0;32m 153\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_preload_cuda_deps\u001b[39m(lib_folder, lib_name):\n",
|
406 |
+
"\u001b[1;31mOSError\u001b[0m: [WinError 126] Belirtilen modül bulunamadı. Error loading \"c:\\gitProjects\\deneme\\.venv\\Lib\\site-packages\\torch\\lib\\fbgemm.dll\" or one of its dependencies."
|
407 |
+
]
|
408 |
+
}
|
409 |
+
],
|
410 |
"source": [
|
411 |
"# uygulama için kullanılcak olan özelliklerin tanımlanması\n",
|
412 |
"from transformers import BertTokenizer,BertForQuestionAnswering,BertConfig\n",
|
413 |
"class QA:\n",
|
414 |
" def __init__(self,model_path: str):\n",
|
415 |
+
" self.max_seq_length = 384 #max seq\n",
|
416 |
" self.doc_stride = 128 #stride \n",
|
417 |
" self.do_lower_case = False\n",
|
418 |
" self.max_query_length = 30\n",
|
|
|
431 |
" \n",
|
432 |
" # This function is used to load the model\n",
|
433 |
" def load_model(self,model_path: str,do_lower_case=False):\n",
|
434 |
+
" config = BertConfig.from_pretrained(model_path + \"C:\\\\gitProjects\\\\deneme\\\\egitim\\\\train_Egitim\")\n",
|
435 |
" tokenizer = BertTokenizer.from_pretrained(model_path, do_lower_case=do_lower_case)\n",
|
436 |
" model = BertForQuestionAnswering.from_pretrained(model_path, from_tf=False, config=config)\n",
|
437 |
+
" return model, tokenizer\n"
|
438 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
439 |
}
|
440 |
],
|
441 |
"metadata": {
|
model.py
ADDED
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datasets import load_dataset
|
2 |
+
import pandas as pd
|
3 |
+
import torch
|
4 |
+
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
|
5 |
+
from transformers import BertTokenizer, BertForQuestionAnswering, BertConfig
|
6 |
+
from pymongo import MongoClient
|
7 |
+
|
8 |
+
class Database:
|
9 |
+
@staticmethod
|
10 |
+
def get_mongodb():
|
11 |
+
# MongoDB bağlantı bilgilerini döndürecek şekilde tanımlanmalıdır.
|
12 |
+
return 'mongodb://localhost:27017/', 'yeniDatabase', 'train'
|
13 |
+
|
14 |
+
@staticmethod
|
15 |
+
def get_input_texts():
|
16 |
+
# MongoDB bağlantı bilgilerini alma
|
17 |
+
mongo_url, db_name, collection_name = Database.get_mongodb()
|
18 |
+
# MongoDB'ye bağlanma
|
19 |
+
client = MongoClient(mongo_url)
|
20 |
+
db = client[db_name]
|
21 |
+
collection = db[collection_name]
|
22 |
+
# Sorguyu tanımlama
|
23 |
+
query = {"Prompt": {"$exists": True}}
|
24 |
+
# Sorguyu çalıştırma ve dökümanları çekme
|
25 |
+
cursor = collection.find(query, {"Prompt": 1, "_id": 0})
|
26 |
+
# Cursor'ı döküman listesine dönüştürme
|
27 |
+
input_texts_from_db = list(cursor)
|
28 |
+
# Input text'leri döndürme
|
29 |
+
return input_texts_from_db
|
30 |
+
|
31 |
+
@staticmethod
|
32 |
+
def get_output_texts():
|
33 |
+
# MongoDB bağlantı bilgilerini alma
|
34 |
+
mongo_url, db_name, collection_name = Database.get_mongodb()
|
35 |
+
# MongoDB'ye bağlanma
|
36 |
+
client = MongoClient(mongo_url)
|
37 |
+
db = client[db_name]
|
38 |
+
collection = db[collection_name]
|
39 |
+
# Sorguyu tanımlama
|
40 |
+
query = {"Response": {"$exists": True}}
|
41 |
+
# Sorguyu çalıştırma ve dökümanları çekme
|
42 |
+
cursor = collection.find(query, {"Response": 1, "_id": 0})
|
43 |
+
# Cursor'ı döküman listesine dönüştürme
|
44 |
+
output_texts_from_db = list(cursor)
|
45 |
+
# Input text'leri döndürme
|
46 |
+
return output_texts_from_db
|
47 |
+
|
48 |
+
@staticmethod
|
49 |
+
def get_average_prompt_token_length():
|
50 |
+
# MongoDB bağlantı bilgilerini alma
|
51 |
+
mongo_url, db_name, collection_name = Database.get_mongodb()
|
52 |
+
# MongoDB'ye bağlanma
|
53 |
+
client = MongoClient(mongo_url)
|
54 |
+
db = client[db_name]
|
55 |
+
collection = db[collection_name]
|
56 |
+
# Tüm dökümanları çekme ve 'prompt_token_length' alanını alma
|
57 |
+
docs = collection.find({}, {'Prompt_token_length': 1})
|
58 |
+
# 'prompt_token_length' değerlerini toplama ve sayma
|
59 |
+
total_length = 0
|
60 |
+
count = 0
|
61 |
+
for doc in docs:
|
62 |
+
if 'Prompt_token_length' in doc:
|
63 |
+
total_length += doc['Prompt_token_length']
|
64 |
+
count += 1
|
65 |
+
# Ortalama hesaplama
|
66 |
+
average_length = total_length / count if count > 0 else 0
|
67 |
+
return int(average_length)
|
68 |
+
|
69 |
+
# Tokenizer ve Modeli yükleme
|
70 |
+
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
71 |
+
# Encode işlemi
|
72 |
+
def tokenize_and_encode(train_df,doc):
|
73 |
+
|
74 |
+
input_texts_from_db = Database.get_input_texts()
|
75 |
+
output_texts_from_db= Database.get_output_texts()
|
76 |
+
input_texts = [doc["Prompt"] for doc in input_texts_from_db]
|
77 |
+
output_texts= [doc["Response"] for doc in output_texts_from_db]
|
78 |
+
encoded = tokenizer.batch_encode_plus(
|
79 |
+
|
80 |
+
#doc['Prompt'].tolist(),
|
81 |
+
#text_pair= doc['Response'].tolist(),
|
82 |
+
input_texts,
|
83 |
+
output_texts,
|
84 |
+
padding=True,
|
85 |
+
truncation=True,
|
86 |
+
max_length=100,
|
87 |
+
return_attention_mask=True,
|
88 |
+
return_tensors='pt'
|
89 |
+
)
|
90 |
+
return encoded
|
91 |
+
encoded_data=tokenize_and_encode()
|
92 |
+
|
93 |
+
class QA:
|
94 |
+
|
95 |
+
#buradaki verilerin değeri değiştirilmeli
|
96 |
+
def __init__(self, model_path: str):
|
97 |
+
self.max_seq_length = 384
|
98 |
+
self.doc_stride = 128
|
99 |
+
self.do_lower_case = False
|
100 |
+
self.max_query_length = 30
|
101 |
+
self.n_best_size = 3
|
102 |
+
self.max_answer_length = 30
|
103 |
+
self.version_2_with_negative = False
|
104 |
+
self.model, self.tokenizer = self.load_model(model_path)
|
105 |
+
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
106 |
+
self.model.to(self.device)
|
107 |
+
self.model.eval()
|
108 |
+
|
109 |
+
def load_model(self, model_path: str, do_lower_case=False):
|
110 |
+
config = BertConfig.from_pretrained(model_path)
|
111 |
+
tokenizer = BertTokenizer.from_pretrained(model_path, do_lower_case=do_lower_case)
|
112 |
+
model = BertForQuestionAnswering.from_pretrained(model_path, from_tf=False, config=config)
|
113 |
+
return model, tokenizer
|
114 |
+
|
115 |
+
def extract_features_from_dataset(self, train_df):
|
116 |
+
def get_max_length(examples):
|
117 |
+
return {
|
118 |
+
'max_seq_length': max(len(e) for e in examples),
|
119 |
+
'max_query_length': max(len(q) for q in examples)
|
120 |
+
}
|
121 |
+
# Örnek bir kullanım
|
122 |
+
features = get_max_length(train_df)
|
123 |
+
return features
|
124 |
+
|
125 |
+
# Ortalama prompt token uzunluğunu al ve yazdır
|
126 |
+
average_length = Database.get_average_prompt_token_length()
|
127 |
+
print(f"Ortalama prompt token uzunluğu: {average_length}")
|
128 |
+
|
129 |
+
# QA sınıfını oluştur
|
130 |
+
qa = QA(model_path='bert-base-uncased')
|
131 |
+
|
132 |
+
|
133 |
+
#tensor veri setini koda entegre etme
|
134 |
+
|
135 |
+
"""# Tensor veri kümesi oluşturma
|
136 |
+
input_ids = encoded_data['input_ids']
|
137 |
+
attention_mask = encoded_data['attention_mask']
|
138 |
+
token_type_ids = encoded_data['token_type_ids']
|
139 |
+
labels = torch.tensor(data['Response'].tolist()) # Cevapları etiket olarak kullanın
|
140 |
+
|
141 |
+
# TensorDataset oluşturma
|
142 |
+
dataset = TensorDataset(input_ids, attention_mask, token_type_ids, labels)
|
143 |
+
|
144 |
+
# DataLoader oluşturma
|
145 |
+
batch_size = 16
|
146 |
+
dataloader = DataLoader(
|
147 |
+
dataset,
|
148 |
+
sampler=RandomSampler(dataset),
|
149 |
+
batch_size=batch_size
|
150 |
+
)"""
|
151 |
+
|
152 |
+
#modelin için epoch sayısının tanımlaması
|
153 |
+
"""# Eğitim için optimizer
|
154 |
+
optimizer = AdamW(model.parameters(), lr=5e-5)
|
155 |
+
|
156 |
+
# Eğitim döngüsü
|
157 |
+
model.train()
|
158 |
+
for epoch in range(3): # Örnek olarak 3 epoch
|
159 |
+
for batch in dataloader:
|
160 |
+
input_ids, attention_mask, token_type_ids, labels = [t.to(device) for t in batch]
|
161 |
+
optimizer.zero_grad()
|
162 |
+
outputs = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, start_positions=labels, end_positions=labels)
|
163 |
+
loss = outputs.loss
|
164 |
+
loss.backward()
|
165 |
+
optimizer.step()
|
166 |
+
print(f"Epoch {epoch+1} loss: {loss.item()}")"""
|
167 |
+
|
168 |
+
|
169 |
+
#sonuçların sınıflandırılması
|
170 |
+
"""# Modeli değerlendirme aşamasına getirme
|
171 |
+
model.eval()
|
172 |
+
|
173 |
+
# Örnek tahmin
|
174 |
+
with torch.no_grad():
|
175 |
+
for batch in dataloader:
|
176 |
+
input_ids, attention_mask, token_type_ids, _ = [t.to(device) for t in batch]
|
177 |
+
outputs = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
|
178 |
+
# Çıktıları kullanarak başlık, alt başlık ve anahtar kelimeler belirleyebilirsiniz
|
179 |
+
"""
|
requirements.txt
CHANGED
@@ -2,5 +2,5 @@ gradio==4.40.0.*
|
|
2 |
pymongo==4.8.0.*
|
3 |
pandas==2.2.2.*
|
4 |
datasets==2.20.0.*
|
5 |
-
torch
|
6 |
transformers==4.43.4.*
|
|
|
2 |
pymongo==4.8.0.*
|
3 |
pandas==2.2.2.*
|
4 |
datasets==2.20.0.*
|
5 |
+
torch
|
6 |
transformers==4.43.4.*
|