PolyakovK commited on
Commit
8c730df
·
1 Parent(s): a2a6e78

Added all files except large model files

Browse files
anekdoty.txt ADDED
The diff for this file is too large to render. See raw diff
 
cached_lm_GPT2Tokenizer_32_train_dataset.txt ADDED
Binary file (825 kB). View file
 
cached_lm_GPT2Tokenizer_32_train_dataset.txt.lock ADDED
File without changes
finetuned/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "sberbank-ai/rugpt3small_based_on_gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 1,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 2,
11
+ "gradient_checkpointing": false,
12
+ "id2label": {
13
+ "0": "LABEL_0"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "label2id": {
17
+ "LABEL_0": 0
18
+ },
19
+ "layer_norm_epsilon": 1e-05,
20
+ "model_type": "gpt2",
21
+ "n_ctx": 2048,
22
+ "n_embd": 768,
23
+ "n_head": 12,
24
+ "n_inner": null,
25
+ "n_layer": 12,
26
+ "n_positions": 2048,
27
+ "pad_token_id": 0,
28
+ "reorder_and_upcast_attn": false,
29
+ "resid_pdrop": 0.1,
30
+ "scale_attn_by_inverse_layer_idx": false,
31
+ "scale_attn_weights": true,
32
+ "summary_activation": null,
33
+ "summary_first_dropout": 0.1,
34
+ "summary_proj_to_labels": true,
35
+ "summary_type": "cls_index",
36
+ "summary_use_proj": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.44.0",
39
+ "use_cache": true,
40
+ "vocab_size": 50264
41
+ }
finetuned/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.44.0"
7
+ }
finetuned/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
finetuned/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:835959117be04e902bc01acc4bf9d85c8ceffd3bc8db4eed27312235c7355c22
3
+ size 500941440
finetuned/runs/Aug08_16-55-33_polyakovk/events.out.tfevents.1723125335.polyakovk.25105.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cecffac6e122a4a480909d8de92471a3242633d8908bda02951d4fa74477b0f1
3
+ size 5520
finetuned/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "mask_token": {
17
+ "content": "<mask>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "pad_token": {
24
+ "content": "<pad>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "<unk>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
finetuned/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<pad>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": true,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "3": {
30
+ "content": "<unk>",
31
+ "lstrip": false,
32
+ "normalized": true,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "4": {
38
+ "content": "<mask>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ }
45
+ },
46
+ "bos_token": "<s>",
47
+ "clean_up_tokenization_spaces": true,
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "mask_token": "<mask>",
51
+ "model_max_length": 2048,
52
+ "pad_token": "<pad>",
53
+ "padding_side": "left",
54
+ "tokenizer_class": "GPT2Tokenizer",
55
+ "truncation_side": "left",
56
+ "trust_remote_code": false,
57
+ "unk_token": "<unk>"
58
+ }
finetuned/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
gpt.ipynb ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "from transformers import GPT2LMHeadModel, GPT2Tokenizer\n",
10
+ "import torch\n",
11
+ "DEVICE = torch.device(\"cuda:0\")\n",
12
+ "\n",
13
+ "model_name_or_path = \"sberbank-ai/rugpt3small_based_on_gpt2\"\n",
14
+ "tokenizer = GPT2Tokenizer.from_pretrained(model_name_or_path)\n",
15
+ "model = GPT2LMHeadModel.from_pretrained(model_name_or_path).to(DEVICE)"
16
+ ]
17
+ },
18
+ {
19
+ "cell_type": "code",
20
+ "execution_count": 3,
21
+ "metadata": {},
22
+ "outputs": [],
23
+ "source": [
24
+ "with open('anekdoty.txt', 'r', encoding='utf-8') as file:\n",
25
+ " text = file.read()"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": 4,
31
+ "metadata": {},
32
+ "outputs": [
33
+ {
34
+ "name": "stderr",
35
+ "output_type": "stream",
36
+ "text": [
37
+ "/home/polyakovk/venv_linux/lib/python3.11/site-packages/transformers/data/datasets/language_modeling.py:53: FutureWarning: This dataset will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets library. You can have a look at this example script for pointers: https://github.com/huggingface/transformers/blob/main/examples/pytorch/language-modeling/run_mlm.py\n",
38
+ " warnings.warn(\n"
39
+ ]
40
+ }
41
+ ],
42
+ "source": [
43
+ "from transformers import TextDataset, DataCollatorForLanguageModeling\n",
44
+ "\n",
45
+ "# Сохраним обучающие данные в .txt файл \n",
46
+ "train_path = 'train_dataset.txt'\n",
47
+ "with open(train_path, \"w\") as f:\n",
48
+ " f.write(text)\n",
49
+ "\n",
50
+ "# Создание датасета\n",
51
+ "train_dataset = TextDataset(tokenizer=tokenizer,file_path=train_path,block_size=32)\n",
52
+ " \n",
53
+ "# Создание даталодера (нарезает текст на оптимальные по длине куски)\n",
54
+ "data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)"
55
+ ]
56
+ },
57
+ {
58
+ "cell_type": "code",
59
+ "execution_count": 5,
60
+ "metadata": {},
61
+ "outputs": [],
62
+ "source": [
63
+ "from transformers import Trainer, TrainingArguments\n",
64
+ "\n",
65
+ "training_args = TrainingArguments(\n",
66
+ " output_dir=\"./finetuned\",\n",
67
+ " overwrite_output_dir=True,\n",
68
+ " num_train_epochs=30,\n",
69
+ " per_device_train_batch_size=32,\n",
70
+ " per_device_eval_batch_size=16,\n",
71
+ " warmup_steps=10,\n",
72
+ " gradient_accumulation_steps=32,\n",
73
+ " )\n",
74
+ "\n",
75
+ "trainer = Trainer(\n",
76
+ " model=model,\n",
77
+ " args=training_args,\n",
78
+ " data_collator=data_collator,\n",
79
+ " train_dataset=train_dataset,\n",
80
+ " optimizers = (torch.optim.AdamW(model.parameters(),lr=0.001),None)\n",
81
+ ")"
82
+ ]
83
+ },
84
+ {
85
+ "cell_type": "code",
86
+ "execution_count": 5,
87
+ "metadata": {},
88
+ "outputs": [
89
+ {
90
+ "data": {
91
+ "text/html": [
92
+ "\n",
93
+ " <div>\n",
94
+ " \n",
95
+ " <progress value='240' max='240' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
96
+ " [240/240 1:14:57, Epoch 27/30]\n",
97
+ " </div>\n",
98
+ " <table border=\"1\" class=\"dataframe\">\n",
99
+ " <thead>\n",
100
+ " <tr style=\"text-align: left;\">\n",
101
+ " <th>Step</th>\n",
102
+ " <th>Training Loss</th>\n",
103
+ " </tr>\n",
104
+ " </thead>\n",
105
+ " <tbody>\n",
106
+ " </tbody>\n",
107
+ "</table><p>"
108
+ ],
109
+ "text/plain": [
110
+ "<IPython.core.display.HTML object>"
111
+ ]
112
+ },
113
+ "metadata": {},
114
+ "output_type": "display_data"
115
+ },
116
+ {
117
+ "data": {
118
+ "text/plain": [
119
+ "TrainOutput(global_step=240, training_loss=0.9343911488850911, metrics={'train_runtime': 4515.8084, 'train_samples_per_second': 58.428, 'train_steps_per_second': 0.053, 'total_flos': 4011240960000000.0, 'train_loss': 0.9343911488850911, 'epoch': 27.927272727272726})"
120
+ ]
121
+ },
122
+ "execution_count": 5,
123
+ "metadata": {},
124
+ "output_type": "execute_result"
125
+ }
126
+ ],
127
+ "source": [
128
+ "trainer.train()"
129
+ ]
130
+ },
131
+ {
132
+ "cell_type": "code",
133
+ "execution_count": 9,
134
+ "metadata": {},
135
+ "outputs": [],
136
+ "source": [
137
+ "model_path = \"finetuned\"\n",
138
+ "tokenizer = GPT2Tokenizer.from_pretrained(model_path)\n",
139
+ "model = GPT2LMHeadModel.from_pretrained(model_path).to(DEVICE)"
140
+ ]
141
+ },
142
+ {
143
+ "cell_type": "code",
144
+ "execution_count": 70,
145
+ "metadata": {},
146
+ "outputs": [],
147
+ "source": [
148
+ "def generate_jokes(prompt, temperature, top_p, max_length, num_return_sequences):\n",
149
+ " input_ids = tokenizer.encode(prompt, return_tensors='pt').to(DEVICE)\n",
150
+ " \n",
151
+ " # Генерируем несколько шуток\n",
152
+ " outputs = model.generate(\n",
153
+ " input_ids=input_ids,\n",
154
+ " do_sample=True,\n",
155
+ " # num_beams=5,\n",
156
+ " temperature=temperature,\n",
157
+ " top_p=top_p,\n",
158
+ " max_length=max_length,\n",
159
+ " num_return_sequences=num_return_sequences\n",
160
+ " )\n",
161
+ " \n",
162
+ " # Обработка всех сгенерированных шуток\n",
163
+ " jokes = []\n",
164
+ " for output in outputs:\n",
165
+ " generated_text = tokenizer.decode(output, skip_special_tokens=True)\n",
166
+ " # Обрезаем текст после первой точки\n",
167
+ " if '…' in generated_text:\n",
168
+ " generated_text = generated_text.split('…')[0] + '.'\n",
169
+ " elif '.' in generated_text:\n",
170
+ " generated_text = generated_text.split('.')[0] + '.'\n",
171
+ " elif '!' in generated_text:\n",
172
+ " generated_text = generated_text.split('!')[0] + '.'\n",
173
+ " jokes.append(generated_text)\n",
174
+ " \n",
175
+ " return jokes"
176
+ ]
177
+ },
178
+ {
179
+ "cell_type": "code",
180
+ "execution_count": 73,
181
+ "metadata": {},
182
+ "outputs": [
183
+ {
184
+ "name": "stdout",
185
+ "output_type": "stream",
186
+ "text": [
187
+ "['Шла Саша по шоссе, громко разговаривая с шофером.', 'Шла Саша по шоссе, громко матерясь и упирая руку в ширинку.', 'Шла Саша по шоссе, несла пургу и, как раз, дождь.', 'Шла Саша по шоссе, но не за трактором.']\n"
188
+ ]
189
+ }
190
+ ],
191
+ "source": [
192
+ "text = \"Шла Саша по шоссе\"\n",
193
+ "print(generate_jokes(text, 1, 0.9, 30, 4))"
194
+ ]
195
+ },
196
+ {
197
+ "cell_type": "code",
198
+ "execution_count": 10,
199
+ "metadata": {},
200
+ "outputs": [
201
+ {
202
+ "name": "stdout",
203
+ "output_type": "stream",
204
+ "text": [
205
+ "\n",
206
+ "однажды я проваливал экзамен по истории.\n",
207
+ "— Вино с возрастом становится лучше. Я становлюсь лучше с вином…\n",
208
+ "— Сними\n"
209
+ ]
210
+ }
211
+ ],
212
+ "source": [
213
+ "text = \"однажды я пришел из школы\"\n",
214
+ "input_ids = tokenizer.encode(text, return_tensors=\"pt\").to(DEVICE)\n",
215
+ "model.eval()\n",
216
+ "with torch.no_grad():\n",
217
+ " out = model.generate(input_ids, \n",
218
+ " do_sample=True,\n",
219
+ " num_beams=2,\n",
220
+ " temperature=1.5,\n",
221
+ " top_p=0.9,\n",
222
+ " max_length=30,\n",
223
+ " \n",
224
+ " )\n",
225
+ "\n",
226
+ "generated_text = list(map(tokenizer.decode, out))[0]\n",
227
+ "print()\n",
228
+ "print(generated_text)"
229
+ ]
230
+ },
231
+ {
232
+ "cell_type": "code",
233
+ "execution_count": 8,
234
+ "metadata": {},
235
+ "outputs": [],
236
+ "source": [
237
+ "# model.save_pretrained('./finetuned')\n",
238
+ "# tokenizer.save_pretrained('./finetuned')"
239
+ ]
240
+ },
241
+ {
242
+ "cell_type": "code",
243
+ "execution_count": 38,
244
+ "metadata": {},
245
+ "outputs": [],
246
+ "source": [
247
+ "# import requests\n",
248
+ "# from bs4 import BeautifulSoup\n",
249
+ "# import re\n",
250
+ "\n",
251
+ "# # Функция для получения шуток с одной страницы\n",
252
+ "# def get_jokes_from_page(url):\n",
253
+ "# response = requests.get(url, headers=headers)\n",
254
+ "# response.raise_for_status() # Проверка на ошибки запроса\n",
255
+ "\n",
256
+ "# soup = BeautifulSoup(response.text, 'html.parser')\n",
257
+ "\n",
258
+ "# # Находим все анекдоты на странице\n",
259
+ "# jokes = soup.find_all('div', class_='anekdot-text') # Замените селектор на правильный\n",
260
+ "\n",
261
+ "# page_jokes = []\n",
262
+ "# for joke in jokes:\n",
263
+ "# # Извлекаем текст анекдота\n",
264
+ "# joke_text = joke.get_text(strip=True)\n",
265
+ " \n",
266
+ "# # Удаляем цифры и символы в конце текста\n",
267
+ "# joke_text_cleaned = re.sub(r'\\d+[\\#\\d]*$', '', joke_text).strip()\n",
268
+ " \n",
269
+ "# # Добавляем очищенный текст в список\n",
270
+ "# page_jokes.append(joke_text_cleaned)\n",
271
+ " \n",
272
+ "# return page_jokes\n",
273
+ "\n",
274
+ "# # URL-шаблон для страниц\n",
275
+ "# base_url = \"https://anekdotovstreet.com/korotkie-anekdoty/{}/\"\n",
276
+ "\n",
277
+ "# # Заголовки для имитации браузера\n",
278
+ "# headers = {\n",
279
+ "# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'\n",
280
+ "# }\n",
281
+ "\n",
282
+ "# # Открываем файл для записи анекдотов\n",
283
+ "# with open('anekdoty.txt', 'w', encoding='utf-8') as file:\n",
284
+ "# for page_number in range(2, 400):\n",
285
+ "# # Формируем URL для текущей страницы\n",
286
+ "# url = base_url.format(page_number)\n",
287
+ "# print(f\"Собираю шутки со страницы {page_number}...\")\n",
288
+ "\n",
289
+ "# # Получаем шутки с текущей страницы\n",
290
+ "# jokes = get_jokes_from_page(url)\n",
291
+ " \n",
292
+ "# # Если шуток нет, значит, страницы закончились (опционально)\n",
293
+ "# if not jokes:\n",
294
+ "# print(f\"Шутки на странице {page_number} не найдены.\")\n",
295
+ "# continue\n",
296
+ " \n",
297
+ "# # Записываем шутки в файл\n",
298
+ "# for joke in jokes:\n",
299
+ "# file.write(joke + '\\n')\n",
300
+ "\n",
301
+ "# print(\"Анекдоты успешно сохранены в файл 'anekdoty.txt'.\")"
302
+ ]
303
+ },
304
+ {
305
+ "cell_type": "code",
306
+ "execution_count": null,
307
+ "metadata": {},
308
+ "outputs": [],
309
+ "source": []
310
+ }
311
+ ],
312
+ "metadata": {
313
+ "kernelspec": {
314
+ "display_name": "venv_linux",
315
+ "language": "python",
316
+ "name": "python3"
317
+ },
318
+ "language_info": {
319
+ "codemirror_mode": {
320
+ "name": "ipython",
321
+ "version": 3
322
+ },
323
+ "file_extension": ".py",
324
+ "mimetype": "text/x-python",
325
+ "name": "python",
326
+ "nbconvert_exporter": "python",
327
+ "pygments_lexer": "ipython3",
328
+ "version": "3.11.9"
329
+ }
330
+ },
331
+ "nbformat": 4,
332
+ "nbformat_minor": 2
333
+ }
gpt_jokes.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import GPT2LMHeadModel, GPT2Tokenizer
3
+ import torch
4
+
5
+
6
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
7
+
8
+
9
+ # Загрузка обученной модели и токенизатора
10
+ model_path = "finetuned"
11
+ tokenizer = GPT2Tokenizer.from_pretrained(model_path)
12
+ model = GPT2LMHeadModel.from_pretrained(model_path).to(DEVICE)
13
+
14
+ def generate_jokes(prompt, temperature, top_p, max_length, num_return_sequences):
15
+ input_ids = tokenizer.encode(prompt, return_tensors='pt').to(DEVICE)
16
+
17
+ # Генерируем несколько шуток
18
+ outputs = model.generate(
19
+ input_ids=input_ids,
20
+ do_sample=True,
21
+ # num_beams=5,
22
+ temperature=temperature,
23
+ top_p=top_p,
24
+ max_length=max_length,
25
+ num_return_sequences=num_return_sequences
26
+ )
27
+
28
+ # Обработка всех сгенерированных шуток
29
+ jokes = []
30
+ for output in outputs:
31
+ generated_text = tokenizer.decode(output, skip_special_tokens=True)
32
+ # Обрезаем текст после первой точки
33
+ if '…' in generated_text:
34
+ generated_text = generated_text.split('…')[0] + '.'
35
+ elif '.' in generated_text:
36
+ generated_text = generated_text.split('.')[0] + '.'
37
+ elif '!' in generated_text:
38
+ generated_text = generated_text.split('!')[0] + '.'
39
+ jokes.append(generated_text)
40
+
41
+ return jokes
42
+
43
+ # Создание интерфейса Streamlit
44
+ st.title('GPT-2, как генератор сомнительных шуток')
45
+
46
+ # Ввод промта
47
+ prompt = st.text_input('Введите свой промт:', 'Народная мудрость гласит')
48
+
49
+ # Регулировка параметров генерации
50
+ max_length = st.slider('Максимальная длина последовательности:', min_value=10, max_value=100, value=35)
51
+ num_return_sequences = st.slider('Число генераций текста:', min_value=1, max_value=5, value=3)
52
+ temperature = st.slider('Температура (дисперсия):', min_value=0.1, max_value=2.0, value=1.0, step=0.1)
53
+ top_p = st.slider('Top-p (ядро):', min_value=0.1, max_value=1.0, value=0.9, step=0.1)
54
+
55
+ # Генерация текста
56
+ if st.button('Сгенерировать'):
57
+ with st.spinner('Генерация текста...'):
58
+ generated_texts = generate_jokes(prompt,temperature, top_p, max_length, num_return_sequences)
59
+ for i, text in enumerate(generated_texts):
60
+ st.subheader(f'Генерация {i + 1}:')
61
+ st.write(text)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ streamlit==1.37.0
2
+ torch==2.4.0
3
+ transformers==4.44.0
train_dataset.txt ADDED
The diff for this file is too large to render. See raw diff