sergey21000 commited on
Commit
10230f7
1 Parent(s): c0d1971

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +0 -1
  2. utils.py +495 -494
app.py CHANGED
@@ -319,7 +319,6 @@ with gr.Blocks(theme=theme, css=css) as interface:
319
  fn=load_llm_model,
320
  inputs=[curr_llm_model_repo, curr_llm_model_path],
321
  outputs=[llm_model, support_system_role, load_llm_model_log],
322
- queue=True,
323
  ).success(
324
  fn=lambda log: log + get_memory_usage(),
325
  inputs=[load_llm_model_log],
 
319
  fn=load_llm_model,
320
  inputs=[curr_llm_model_repo, curr_llm_model_path],
321
  outputs=[llm_model, support_system_role, load_llm_model_log],
 
322
  ).success(
323
  fn=lambda log: log + get_memory_usage(),
324
  inputs=[load_llm_model_log],
utils.py CHANGED
@@ -1,495 +1,496 @@
1
- import csv
2
- from pathlib import Path
3
- from shutil import rmtree
4
- from typing import List, Tuple, Dict, Union, Optional, Any, Iterable
5
- from tqdm import tqdm
6
-
7
- import psutil
8
- import requests
9
- from requests.exceptions import MissingSchema
10
-
11
- import torch
12
- import gradio as gr
13
-
14
- from llama_cpp import Llama
15
- from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound, TranscriptsDisabled
16
- from huggingface_hub import hf_hub_download, list_repo_tree, list_repo_files, repo_info, repo_exists, snapshot_download
17
-
18
- from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
19
- from langchain_community.vectorstores import FAISS
20
- from langchain_huggingface import HuggingFaceEmbeddings
21
-
22
- # imports for annotations
23
- from langchain.docstore.document import Document
24
- from langchain_core.embeddings import Embeddings
25
- from langchain_core.vectorstores import VectorStore
26
-
27
- from config import (
28
- LLM_MODELS_PATH,
29
- EMBED_MODELS_PATH,
30
- GENERATE_KWARGS,
31
- LOADER_CLASSES,
32
- CONTEXT_TEMPLATE,
33
- )
34
-
35
-
36
- # type annotations
37
- CHAT_HISTORY = List[Tuple[Optional[str], Optional[str]]]
38
- LLM_MODEL_DICT = Dict[str, Llama]
39
- EMBED_MODEL_DICT = Dict[str, Embeddings]
40
-
41
-
42
- # ===================== ADDITIONAL FUNCS =======================
43
-
44
- # getting the amount of free memory on disk, CPU and GPU
45
- def get_memory_usage() -> str:
46
- print_memory = ''
47
-
48
- memory_type = 'Disk'
49
- psutil_stats = psutil.disk_usage('.')
50
- memory_total = psutil_stats.total / 1024**3
51
- memory_usage = psutil_stats.used / 1024**3
52
- print_memory += f'{memory_type} Menory Usage: {memory_usage:.2f} / {memory_total:.2f} GB\n'
53
-
54
- memory_type = 'CPU'
55
- psutil_stats = psutil.virtual_memory()
56
- memory_total = psutil_stats.total / 1024**3
57
- memory_usage = memory_total - (psutil_stats.available / 1024**3)
58
- print_memory += f'{memory_type} Menory Usage: {memory_usage:.2f} / {memory_total:.2f} GB\n'
59
-
60
- if torch.cuda.is_available():
61
- memory_type = 'GPU'
62
- memory_free, memory_total = torch.cuda.mem_get_info()
63
- memory_usage = memory_total - memory_free
64
- print_memory += f'{memory_type} Menory Usage: {memory_usage / 1024**3:.2f} / {memory_total:.2f} GB\n'
65
-
66
- print_memory = f'---------------\n{print_memory}---------------'
67
- return print_memory
68
-
69
-
70
- # clearing the list of documents
71
- def clear_documents(documents: Iterable[Document]) -> Iterable[Document]:
72
- def clear_text(text: str) -> str:
73
- lines = text.split('\n')
74
- lines = [line for line in lines if len(line.strip()) > 2]
75
- text = '\n'.join(lines).strip()
76
- return text
77
-
78
- output_documents = []
79
- for document in documents:
80
- text = clear_text(document.page_content)
81
- if len(text) > 10:
82
- document.page_content = text
83
- output_documents.append(document)
84
- return output_documents
85
-
86
-
87
- # ===================== INTERFACE FUNCS =============================
88
-
89
-
90
- # ------------- LLM AND EMBEDDING MODELS LOADING ------------------------
91
-
92
- # функция для загрузки файла по URL ссылке и отображением прогресс баров tqdm и gradio
93
- def download_file(file_url: str, file_path: Union[str, Path]) -> None:
94
- response = requests.get(file_url, stream=True)
95
- if response.status_code != 200:
96
- raise Exception(f'The file is not available for download at the link: {file_url}')
97
- total_size = int(response.headers.get('content-length', 0))
98
- progress_tqdm = tqdm(desc='Loading GGUF file', total=total_size, unit='iB', unit_scale=True)
99
- progress_gradio = gr.Progress()
100
- completed_size = 0
101
- with open(file_path, 'wb') as file:
102
- for data in response.iter_content(chunk_size=4096):
103
- size = file.write(data)
104
- progress_tqdm.update(size)
105
- completed_size += size
106
- desc = f'Loading GGUF file, {completed_size/1024**3:.3f}/{total_size/1024**3:.3f} GB'
107
- progress_gradio(completed_size/total_size, desc=desc)
108
-
109
-
110
- # loading and initializing the GGUF model
111
- def load_llm_model(model_repo: str, model_file: str) -> Tuple[LLM_MODEL_DICT, str, str]:
112
- llm_model = None
113
- load_log = ''
114
- support_system_role = False
115
-
116
- if isinstance(model_file, list):
117
- load_log += 'No model selected\n'
118
- return llm_model, load_log
119
- if '(' in model_file:
120
- model_file = model_file.split('(')[0].rstrip()
121
-
122
- progress = gr.Progress()
123
- progress(0.3, desc='Step 1/2: Download the GGUF file')
124
- model_path = LLM_MODELS_PATH / model_file
125
-
126
- if model_path.is_file():
127
- load_log += f'Model {model_file} already loaded, reinitializing\n'
128
- else:
129
- try:
130
- gguf_url = f'https://huggingface.co/{model_repo}/resolve/main/{model_file}'
131
- download_file(gguf_url, model_path)
132
- load_log += f'Model {model_file} loaded\n'
133
- except Exception as ex:
134
- model_path = ''
135
- load_log += f'Error loading model, error code:\n{ex}\n'
136
-
137
- if model_path:
138
- progress(0.7, desc='Step 2/2: Initialize the model')
139
- try:
140
- llm_model = Llama(model_path=str(model_path), n_gpu_layers=-1, verbose=False)
141
- support_system_role = 'System role not supported' not in llm_model.metadata['tokenizer.chat_template']
142
- load_log += f'Model {model_file} initialized, max context size is {llm_model.n_ctx()} tokens\n'
143
- except Exception as ex:
144
- load_log += f'Error initializing model, error code:\n{ex}\n'
145
-
146
- llm_model = {'model': llm_model}
147
- return llm_model, support_system_role, load_log
148
-
149
-
150
- # loading and initializing the embedding model
151
- def load_embed_model(model_repo: str) -> Tuple[Dict[str, HuggingFaceEmbeddings], str]:
152
- embed_model = None
153
- load_log = ''
154
-
155
- if isinstance(model_repo, list):
156
- load_log = 'No model selected'
157
- return embed_model, load_log
158
-
159
- progress = gr.Progress()
160
- folder_name = model_repo.replace('/', '_')
161
- folder_path = EMBED_MODELS_PATH / folder_name
162
- if Path(folder_path).is_dir():
163
- load_log += f'Reinitializing model {model_repo} \n'
164
- else:
165
- progress(0.5, desc='Step 1/2: Download model repository')
166
- snapshot_download(
167
- repo_id=model_repo,
168
- local_dir=folder_path,
169
- ignore_patterns='*.h5',
170
- )
171
- load_log += f'Model {model_repo} loaded\n'
172
-
173
- progress(0.7, desc='Шаг 2/2: Инициализация модели')
174
- model_kwargs = {'device': 'cuda' if torch.cuda.is_available() else 'cpu'}
175
- embed_model = HuggingFaceEmbeddings(
176
- model_name=str(folder_path),
177
- model_kwargs=model_kwargs,
178
- # encode_kwargs={'normalize_embeddings': True},
179
- )
180
- load_log += f'Embeddings model {model_repo} initialized\n'
181
- load_log += f'Please upload documents and initialize database again\n'
182
- embed_model = {'embed_model': embed_model}
183
- return embed_model, load_log
184
-
185
-
186
- # adding a new HF repository new_model_repo to the current list of model_repos
187
- def add_new_model_repo(new_model_repo: str, model_repos: List[str]) -> Tuple[gr.Dropdown, str]:
188
- load_log = ''
189
- repo = new_model_repo.strip()
190
- if repo:
191
- repo = repo.split('/')[-2:]
192
- if len(repo) == 2:
193
- repo = '/'.join(repo).split('?')[0]
194
- if repo_exists(repo) and repo not in model_repos:
195
- model_repos.insert(0, repo)
196
- load_log += f'Model repository {repo} successfully added\n'
197
- else:
198
- load_log += 'Invalid HF repository name or model already in the list\n'
199
- else:
200
- load_log += 'Invalid link to HF repository\n'
201
- else:
202
- load_log += 'Empty line in HF repository field\n'
203
- model_repo_dropdown = gr.Dropdown(choices=model_repos, value=model_repos[0])
204
- return model_repo_dropdown, load_log
205
-
206
-
207
- # get list of GGUF models from HF repository
208
- def get_gguf_model_names(model_repo: str) -> gr.Dropdown:
209
- repo_files = list(list_repo_tree(model_repo))
210
- repo_files = [file for file in repo_files if file.path.endswith('.gguf')]
211
- model_paths = [f'{file.path} ({file.size / 1000 ** 3:.2f}G)' for file in repo_files]
212
- model_paths_dropdown = gr.Dropdown(
213
- choices=model_paths,
214
- value=model_paths[0],
215
- label='GGUF model file',
216
- )
217
- return model_paths_dropdown
218
-
219
-
220
- # delete model files and folders to clear space except for the current model gguf_filename
221
- def clear_llm_folder(gguf_filename: str) -> None:
222
- if gguf_filename is None:
223
- gr.Info(f'The name of the model file that does not need to be deleted is not selected.')
224
- return
225
- if '(' in gguf_filename:
226
- gguf_filename = gguf_filename.split('(')[0].rstrip()
227
- for path in LLM_MODELS_PATH.iterdir():
228
- if path.name == gguf_filename:
229
- continue
230
- if path.is_file():
231
- path.unlink(missing_ok=True)
232
- gr.Info(f'All files removed from directory {LLM_MODELS_PATH} except {gguf_filename}')
233
-
234
-
235
- # delete model folders to clear space except for the current model model_folder_name
236
- def clear_embed_folder(model_repo: str) -> None:
237
- if model_repo is None:
238
- gr.Info(f'The name of the model that does not need to be deleted is not selected.')
239
- return
240
- model_folder_name = model_repo.replace('/', '_')
241
- for path in EMBED_MODELS_PATH.iterdir():
242
- if path.name == model_folder_name:
243
- continue
244
- if path.is_dir():
245
- rmtree(path, ignore_errors=True)
246
- gr.Info(f'All directories have been removed from the {EMBED_MODELS_PATH} directory except {model_folder_name}')
247
-
248
-
249
- # ------------------------ YOUTUBE ------------------------
250
-
251
- # function to check availability of subtitles, if manual or automatic are available - returns True and logs
252
- # if subtitles are not available - returns False and logs
253
- def check_subtitles_available(yt_video_link: str, target_lang: str) -> Tuple[bool, str]:
254
- video_id = yt_video_link.split('watch?v=')[-1].split('&')[0]
255
- load_log = ''
256
- available = True
257
- try:
258
- transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
259
- try:
260
- transcript = transcript_list.find_transcript([target_lang])
261
- if transcript.is_generated:
262
- load_log += f'Automatic subtitles will be loaded, manual ones are not available for video {yt_video_link}\n'
263
- else:
264
- load_log += f'Manual subtitles will be downloaded for the video {yt_video_link}\n'
265
- except NoTranscriptFound:
266
- load_log += f'Subtitle language {target_lang} is not available for video {yt_video_link}\n'
267
- available = False
268
- except TranscriptsDisabled:
269
- load_log += f'Invalid video url ({yt_video_link}) or current server IP is blocked for YouTube\n'
270
- available = False
271
- return available, load_log
272
-
273
-
274
- # ------------- UPLOADING DOCUMENTS FOR RAG ------------------------
275
-
276
- # extract documents (in langchain Documents format) from downloaded files
277
- def load_documents_from_files(upload_files: List[str]) -> Tuple[List[Document], str]:
278
- load_log = ''
279
- documents = []
280
- for upload_file in upload_files:
281
- file_extension = f".{upload_file.split('.')[-1]}"
282
- if file_extension in LOADER_CLASSES:
283
- loader_class = LOADER_CLASSES[file_extension]
284
- loader_kwargs = {}
285
- if file_extension == '.csv':
286
- with open(upload_file) as csvfile:
287
- delimiter = csv.Sniffer().sniff(csvfile.read(4096)).delimiter
288
- loader_kwargs = {'csv_args': {'delimiter': delimiter}}
289
- try:
290
- load_documents = loader_class(upload_file, **loader_kwargs).load()
291
- documents.extend(load_documents)
292
- except Exception as ex:
293
- load_log += f'Error uploading file {upload_file}\n'
294
- load_log += f'Error code: {ex}\n'
295
- continue
296
- else:
297
- load_log += f'Unsupported file format {upload_file}\n'
298
- continue
299
- return documents, load_log
300
-
301
-
302
- # extracting documents (in langchain Documents format) from WEB links
303
- def load_documents_from_links(
304
- web_links: str,
305
- subtitles_lang: str,
306
- ) -> Tuple[List[Document], str]:
307
-
308
- load_log = ''
309
- documents = []
310
- loader_class_kwargs = {}
311
- web_links = [web_link.strip() for web_link in web_links.split('\n') if web_link.strip()]
312
- for web_link in web_links:
313
- if 'youtube.com' in web_link:
314
- available, log = check_subtitles_available(web_link, subtitles_lang)
315
- load_log += log
316
- if not available:
317
- continue
318
- loader_class = LOADER_CLASSES['youtube'].from_youtube_url
319
- loader_class_kwargs = {'language': subtitles_lang}
320
- else:
321
- loader_class = LOADER_CLASSES['web']
322
-
323
- try:
324
- if requests.get(web_link).status_code != 200:
325
- load_log += f'Ссылка недоступна для Python requests: {web_link}\n'
326
- continue
327
- load_documents = loader_class(web_link, **loader_class_kwargs).load()
328
- if len(load_documents) == 0:
329
- load_log += f'No text chunks were found at the link: {web_link}\n'
330
- continue
331
- documents.extend(load_documents)
332
- except MissingSchema:
333
- load_log += f'Invalid link: {web_link}\n'
334
- continue
335
- except Exception as ex:
336
- load_log += f'Error loading data by web loader at link: {web_link}\n'
337
- load_log += f'Error code: {ex}\n'
338
- continue
339
- return documents, load_log
340
-
341
-
342
- # uploading files and generating documents and databases
343
- def load_documents_and_create_db(
344
- upload_files: Optional[List[str]],
345
- web_links: str,
346
- subtitles_lang: str,
347
- chunk_size: int,
348
- chunk_overlap: int,
349
- embed_model_dict: EMBED_MODEL_DICT,
350
- ) -> Tuple[List[Document], Optional[VectorStore], str]:
351
-
352
- load_log = ''
353
- all_documents = []
354
- db = None
355
- progress = gr.Progress()
356
-
357
- embed_model = embed_model_dict.get('embed_model')
358
- if embed_model is None:
359
- load_log += 'Embeddings model not initialized, DB cannot be created'
360
- return all_documents, db, load_log
361
-
362
- if upload_files is None and not web_links:
363
- load_log = 'No files or links selected'
364
- return all_documents, db, load_log
365
-
366
- if upload_files is not None:
367
- progress(0.3, desc='Step 1/2: Upload documents from files')
368
- docs, log = load_documents_from_files(upload_files)
369
- all_documents.extend(docs)
370
- load_log += log
371
-
372
- if web_links:
373
- progress(0.3 if upload_files is None else 0.5, desc='Step 1/2: Upload documents via links')
374
- docs, log = load_documents_from_links(web_links, subtitles_lang)
375
- all_documents.extend(docs)
376
- load_log += log
377
-
378
- if len(all_documents) == 0:
379
- load_log += 'Download was interrupted because no documents were extracted\n'
380
- load_log += 'RAG mode cannot be activated'
381
- return all_documents, db, load_log
382
-
383
- load_log += f'Documents loaded: {len(all_documents)}\n'
384
- text_splitter = RecursiveCharacterTextSplitter(
385
- chunk_size=chunk_size,
386
- chunk_overlap=chunk_overlap,
387
- )
388
- documents = text_splitter.split_documents(all_documents)
389
- documents = clear_documents(documents)
390
- load_log += f'Documents are divided, number of text chunks: {len(documents)}\n'
391
-
392
- progress(0.7, desc='Step 2/2: Initialize DB')
393
- db = FAISS.from_documents(documents=documents, embedding=embed_model)
394
- load_log += 'DB is initialized, RAG mode is activated and can be activated in the Chatbot tab'
395
- return documents, db, load_log
396
-
397
-
398
- # ------------------ ФУНКЦИИ ЧАТ БОТА ------------------------
399
-
400
- # adding a user message to the chat bot window
401
- def user_message_to_chatbot(user_message: str, chatbot: CHAT_HISTORY) -> Tuple[str, CHAT_HISTORY]:
402
- chatbot.append([user_message, None])
403
- return '', chatbot
404
-
405
-
406
- # formatting prompt with adding context if DB is available and RAG mode is enabled
407
- def update_user_message_with_context(
408
- chatbot: CHAT_HISTORY,
409
- rag_mode: bool,
410
- db: VectorStore,
411
- k: Union[int, str],
412
- score_threshold: float,
413
- ) -> Tuple[str, CHAT_HISTORY]:
414
-
415
- user_message = chatbot[-1][0]
416
- user_message_with_context = ''
417
- if db is not None and rag_mode and user_message.strip():
418
- if k == 'all':
419
- k = len(db.docstore._dict)
420
- docs_and_distances = db.similarity_search_with_relevance_scores(
421
- user_message,
422
- k=k,
423
- score_threshold=score_threshold,
424
- )
425
- if len(docs_and_distances) > 0:
426
- retriever_context = '\n\n'.join([doc[0].page_content for doc in docs_and_distances])
427
- user_message_with_context = CONTEXT_TEMPLATE.format(
428
- user_message=user_message,
429
- context=retriever_context,
430
- )
431
- return user_message_with_context
432
-
433
-
434
- # model response generation
435
- def get_llm_response(
436
- chatbot: CHAT_HISTORY,
437
- llm_model_dict: LLM_MODEL_DICT,
438
- user_message_with_context: str,
439
- rag_mode: bool,
440
- system_prompt: str,
441
- support_system_role: bool,
442
- history_len: int,
443
- do_sample: bool,
444
- *generate_args,
445
- ) -> CHAT_HISTORY:
446
-
447
- user_message = chatbot[-1][0]
448
- if not user_message.strip():
449
- yield chatbot[:-1]
450
- return None
451
-
452
- if rag_mode:
453
- if user_message_with_context:
454
- user_message = user_message_with_context
455
- else:
456
- gr.Info((
457
- f'No documents relevant to the query were found, generation in RAG mode is not possible.\n'
458
- f'Try reducing searh_score_threshold or disable RAG mode for normal generation'
459
- ))
460
- yield chatbot[:-1]
461
- return None
462
-
463
- llm_model = llm_model_dict.get('model')
464
- gen_kwargs = dict(zip(GENERATE_KWARGS.keys(), generate_args))
465
- gen_kwargs['top_k'] = int(gen_kwargs['top_k'])
466
- if not do_sample:
467
- gen_kwargs['top_p'] = 0.0
468
- gen_kwargs['top_k'] = 1
469
- gen_kwargs['repeat_penalty'] = 1.0
470
-
471
- messages = []
472
- if support_system_role and system_prompt:
473
- messages.append({'role': 'system', 'content': system_prompt})
474
-
475
- if history_len != 0:
476
- for user_msg, bot_msg in chatbot[:-1][-history_len:]:
477
- messages.append({'role': 'user', 'content': user_msg})
478
- messages.append({'role': 'assistant', 'content': bot_msg})
479
-
480
- messages.append({'role': 'user', 'content': user_message})
481
- stream_response = llm_model.create_chat_completion(
482
- messages=messages,
483
- stream=True,
484
- **gen_kwargs,
485
- )
486
- try:
487
- chatbot[-1][1] = ''
488
- for chunk in stream_response:
489
- token = chunk['choices'][0]['delta'].get('content')
490
- if token is not None:
491
- chatbot[-1][1] += token
492
- yield chatbot
493
- except Exception as ex:
494
- gr.Info(f'Error generating response, error code: {ex}')
 
495
  yield chatbot
 
1
+ import csv
2
+ from pathlib import Path
3
+ from shutil import rmtree
4
+ from typing import List, Tuple, Dict, Union, Optional, Any, Iterable
5
+ from tqdm import tqdm
6
+
7
+ import psutil
8
+ import requests
9
+ from requests.exceptions import MissingSchema
10
+
11
+ import torch
12
+ import gradio as gr
13
+
14
+ from llama_cpp import Llama
15
+ from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound, TranscriptsDisabled
16
+ from huggingface_hub import hf_hub_download, list_repo_tree, list_repo_files, repo_info, repo_exists, snapshot_download
17
+
18
+ from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
19
+ from langchain_community.vectorstores import FAISS
20
+ from langchain_huggingface import HuggingFaceEmbeddings
21
+
22
+ # imports for annotations
23
+ from langchain.docstore.document import Document
24
+ from langchain_core.embeddings import Embeddings
25
+ from langchain_core.vectorstores import VectorStore
26
+
27
+ from config import (
28
+ LLM_MODELS_PATH,
29
+ EMBED_MODELS_PATH,
30
+ GENERATE_KWARGS,
31
+ LOADER_CLASSES,
32
+ CONTEXT_TEMPLATE,
33
+ )
34
+
35
+
36
+ # type annotations
37
+ CHAT_HISTORY = List[Tuple[Optional[str], Optional[str]]]
38
+ LLM_MODEL_DICT = Dict[str, Llama]
39
+ EMBED_MODEL_DICT = Dict[str, Embeddings]
40
+
41
+
42
+ # ===================== ADDITIONAL FUNCS =======================
43
+
44
+ # getting the amount of free memory on disk, CPU and GPU
45
+ def get_memory_usage() -> str:
46
+ print_memory = ''
47
+
48
+ memory_type = 'Disk'
49
+ psutil_stats = psutil.disk_usage('.')
50
+ memory_total = psutil_stats.total / 1024**3
51
+ memory_usage = psutil_stats.used / 1024**3
52
+ print_memory += f'{memory_type} Menory Usage: {memory_usage:.2f} / {memory_total:.2f} GB\n'
53
+
54
+ memory_type = 'CPU'
55
+ psutil_stats = psutil.virtual_memory()
56
+ memory_total = psutil_stats.total / 1024**3
57
+ memory_usage = memory_total - (psutil_stats.available / 1024**3)
58
+ print_memory += f'{memory_type} Menory Usage: {memory_usage:.2f} / {memory_total:.2f} GB\n'
59
+
60
+ if torch.cuda.is_available():
61
+ memory_type = 'GPU'
62
+ memory_free, memory_total = torch.cuda.mem_get_info()
63
+ memory_usage = memory_total - memory_free
64
+ print_memory += f'{memory_type} Menory Usage: {memory_usage / 1024**3:.2f} / {memory_total:.2f} GB\n'
65
+
66
+ print_memory = f'---------------\n{print_memory}---------------'
67
+ return print_memory
68
+
69
+
70
+ # clearing the list of documents
71
+ def clear_documents(documents: Iterable[Document]) -> Iterable[Document]:
72
+ def clear_text(text: str) -> str:
73
+ lines = text.split('\n')
74
+ lines = [line for line in lines if len(line.strip()) > 2]
75
+ text = '\n'.join(lines).strip()
76
+ return text
77
+
78
+ output_documents = []
79
+ for document in documents:
80
+ text = clear_text(document.page_content)
81
+ if len(text) > 10:
82
+ document.page_content = text
83
+ output_documents.append(document)
84
+ return output_documents
85
+
86
+
87
+ # ===================== INTERFACE FUNCS =============================
88
+
89
+
90
+ # ------------- LLM AND EMBEDDING MODELS LOADING ------------------------
91
+
92
+ # downloading file by URL link and displaying progress bars tqdm and gradio
93
+ def download_file(file_url: str, file_path: Union[str, Path]) -> None:
94
+ response = requests.get(file_url, stream=True)
95
+ if response.status_code != 200:
96
+ raise Exception(f'The file is not available for download at the link: {file_url}')
97
+ total_size = int(response.headers.get('content-length', 0))
98
+ progress_tqdm = tqdm(desc='Loading GGUF file', total=total_size, unit='iB', unit_scale=True)
99
+ progress_gradio = gr.Progress()
100
+ completed_size = 0
101
+ with open(file_path, 'wb') as file:
102
+ for data in response.iter_content(chunk_size=4096):
103
+ size = file.write(data)
104
+ progress_tqdm.update(size)
105
+ completed_size += size
106
+ desc = f'Loading GGUF file, {completed_size/1024**3:.3f}/{total_size/1024**3:.3f} GB'
107
+ progress_gradio(completed_size/total_size, desc=desc)
108
+
109
+
110
+ # loading and initializing the GGUF model
111
+ def load_llm_model(model_repo: str, model_file: str) -> Tuple[LLM_MODEL_DICT, str, str]:
112
+ llm_model = None
113
+ load_log = ''
114
+ support_system_role = False
115
+
116
+ if isinstance(model_file, list):
117
+ load_log += 'No model selected\n'
118
+ return {'model': llm_model}, support_system_role, load_log
119
+
120
+ if '(' in model_file:
121
+ model_file = model_file.split('(')[0].rstrip()
122
+
123
+ progress = gr.Progress()
124
+ progress(0.3, desc='Step 1/2: Download the GGUF file')
125
+ model_path = LLM_MODELS_PATH / model_file
126
+
127
+ if model_path.is_file():
128
+ load_log += f'Model {model_file} already loaded, reinitializing\n'
129
+ else:
130
+ try:
131
+ gguf_url = f'https://huggingface.co/{model_repo}/resolve/main/{model_file}'
132
+ download_file(gguf_url, model_path)
133
+ load_log += f'Model {model_file} loaded\n'
134
+ except Exception as ex:
135
+ model_path = ''
136
+ load_log += f'Error loading model, error code:\n{ex}\n'
137
+
138
+ if model_path:
139
+ progress(0.7, desc='Step 2/2: Initialize the model')
140
+ try:
141
+ llm_model = Llama(model_path=str(model_path), n_gpu_layers=-1, verbose=False)
142
+ support_system_role = 'System role not supported' not in llm_model.metadata['tokenizer.chat_template']
143
+ load_log += f'Model {model_file} initialized, max context size is {llm_model.n_ctx()} tokens\n'
144
+ except Exception as ex:
145
+ load_log += f'Error initializing model, error code:\n{ex}\n'
146
+
147
+ llm_model = {'model': llm_model}
148
+ return llm_model, support_system_role, load_log
149
+
150
+
151
+ # loading and initializing the embedding model
152
+ def load_embed_model(model_repo: str) -> Tuple[Dict[str, HuggingFaceEmbeddings], str]:
153
+ embed_model = None
154
+ load_log = ''
155
+
156
+ if isinstance(model_repo, list):
157
+ load_log = 'No model selected'
158
+ return embed_model, load_log
159
+
160
+ progress = gr.Progress()
161
+ folder_name = model_repo.replace('/', '_')
162
+ folder_path = EMBED_MODELS_PATH / folder_name
163
+ if Path(folder_path).is_dir():
164
+ load_log += f'Reinitializing model {model_repo} \n'
165
+ else:
166
+ progress(0.5, desc='Step 1/2: Download model repository')
167
+ snapshot_download(
168
+ repo_id=model_repo,
169
+ local_dir=folder_path,
170
+ ignore_patterns='*.h5',
171
+ )
172
+ load_log += f'Model {model_repo} loaded\n'
173
+
174
+ progress(0.7, desc='Шаг 2/2: Инициализация модели')
175
+ model_kwargs = {'device': 'cuda' if torch.cuda.is_available() else 'cpu'}
176
+ embed_model = HuggingFaceEmbeddings(
177
+ model_name=str(folder_path),
178
+ model_kwargs=model_kwargs,
179
+ # encode_kwargs={'normalize_embeddings': True},
180
+ )
181
+ load_log += f'Embeddings model {model_repo} initialized\n'
182
+ load_log += f'Please upload documents and initialize database again\n'
183
+ embed_model = {'embed_model': embed_model}
184
+ return embed_model, load_log
185
+
186
+
187
+ # adding a new HF repository new_model_repo to the current list of model_repos
188
+ def add_new_model_repo(new_model_repo: str, model_repos: List[str]) -> Tuple[gr.Dropdown, str]:
189
+ load_log = ''
190
+ repo = new_model_repo.strip()
191
+ if repo:
192
+ repo = repo.split('/')[-2:]
193
+ if len(repo) == 2:
194
+ repo = '/'.join(repo).split('?')[0]
195
+ if repo_exists(repo) and repo not in model_repos:
196
+ model_repos.insert(0, repo)
197
+ load_log += f'Model repository {repo} successfully added\n'
198
+ else:
199
+ load_log += 'Invalid HF repository name or model already in the list\n'
200
+ else:
201
+ load_log += 'Invalid link to HF repository\n'
202
+ else:
203
+ load_log += 'Empty line in HF repository field\n'
204
+ model_repo_dropdown = gr.Dropdown(choices=model_repos, value=model_repos[0])
205
+ return model_repo_dropdown, load_log
206
+
207
+
208
+ # get list of GGUF models from HF repository
209
+ def get_gguf_model_names(model_repo: str) -> gr.Dropdown:
210
+ repo_files = list(list_repo_tree(model_repo))
211
+ repo_files = [file for file in repo_files if file.path.endswith('.gguf')]
212
+ model_paths = [f'{file.path} ({file.size / 1000 ** 3:.2f}G)' for file in repo_files]
213
+ model_paths_dropdown = gr.Dropdown(
214
+ choices=model_paths,
215
+ value=model_paths[0],
216
+ label='GGUF model file',
217
+ )
218
+ return model_paths_dropdown
219
+
220
+
221
+ # delete model files and folders to clear space except for the current model gguf_filename
222
+ def clear_llm_folder(gguf_filename: str) -> None:
223
+ if gguf_filename is None:
224
+ gr.Info(f'The name of the model file that does not need to be deleted is not selected.')
225
+ return
226
+ if '(' in gguf_filename:
227
+ gguf_filename = gguf_filename.split('(')[0].rstrip()
228
+ for path in LLM_MODELS_PATH.iterdir():
229
+ if path.name == gguf_filename:
230
+ continue
231
+ if path.is_file():
232
+ path.unlink(missing_ok=True)
233
+ gr.Info(f'All files removed from directory {LLM_MODELS_PATH} except {gguf_filename}')
234
+
235
+
236
+ # delete model folders to clear space except for the current model model_folder_name
237
+ def clear_embed_folder(model_repo: str) -> None:
238
+ if model_repo is None:
239
+ gr.Info(f'The name of the model that does not need to be deleted is not selected.')
240
+ return
241
+ model_folder_name = model_repo.replace('/', '_')
242
+ for path in EMBED_MODELS_PATH.iterdir():
243
+ if path.name == model_folder_name:
244
+ continue
245
+ if path.is_dir():
246
+ rmtree(path, ignore_errors=True)
247
+ gr.Info(f'All directories have been removed from the {EMBED_MODELS_PATH} directory except {model_folder_name}')
248
+
249
+
250
+ # ------------------------ YOUTUBE ------------------------
251
+
252
+ # function to check availability of subtitles, if manual or automatic are available - returns True and logs
253
+ # if subtitles are not available - returns False and logs
254
+ def check_subtitles_available(yt_video_link: str, target_lang: str) -> Tuple[bool, str]:
255
+ video_id = yt_video_link.split('watch?v=')[-1].split('&')[0]
256
+ load_log = ''
257
+ available = True
258
+ try:
259
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
260
+ try:
261
+ transcript = transcript_list.find_transcript([target_lang])
262
+ if transcript.is_generated:
263
+ load_log += f'Automatic subtitles will be loaded, manual ones are not available for video {yt_video_link}\n'
264
+ else:
265
+ load_log += f'Manual subtitles will be downloaded for the video {yt_video_link}\n'
266
+ except NoTranscriptFound:
267
+ load_log += f'Subtitle language {target_lang} is not available for video {yt_video_link}\n'
268
+ available = False
269
+ except TranscriptsDisabled:
270
+ load_log += f'Invalid video url ({yt_video_link}) or current server IP is blocked for YouTube\n'
271
+ available = False
272
+ return available, load_log
273
+
274
+
275
+ # ------------- UPLOADING DOCUMENTS FOR RAG ------------------------
276
+
277
+ # extract documents (in langchain Documents format) from downloaded files
278
+ def load_documents_from_files(upload_files: List[str]) -> Tuple[List[Document], str]:
279
+ load_log = ''
280
+ documents = []
281
+ for upload_file in upload_files:
282
+ file_extension = f".{upload_file.split('.')[-1]}"
283
+ if file_extension in LOADER_CLASSES:
284
+ loader_class = LOADER_CLASSES[file_extension]
285
+ loader_kwargs = {}
286
+ if file_extension == '.csv':
287
+ with open(upload_file) as csvfile:
288
+ delimiter = csv.Sniffer().sniff(csvfile.read(4096)).delimiter
289
+ loader_kwargs = {'csv_args': {'delimiter': delimiter}}
290
+ try:
291
+ load_documents = loader_class(upload_file, **loader_kwargs).load()
292
+ documents.extend(load_documents)
293
+ except Exception as ex:
294
+ load_log += f'Error uploading file {upload_file}\n'
295
+ load_log += f'Error code: {ex}\n'
296
+ continue
297
+ else:
298
+ load_log += f'Unsupported file format {upload_file}\n'
299
+ continue
300
+ return documents, load_log
301
+
302
+
303
+ # extracting documents (in langchain Documents format) from WEB links
304
+ def load_documents_from_links(
305
+ web_links: str,
306
+ subtitles_lang: str,
307
+ ) -> Tuple[List[Document], str]:
308
+
309
+ load_log = ''
310
+ documents = []
311
+ loader_class_kwargs = {}
312
+ web_links = [web_link.strip() for web_link in web_links.split('\n') if web_link.strip()]
313
+ for web_link in web_links:
314
+ if 'youtube.com' in web_link:
315
+ available, log = check_subtitles_available(web_link, subtitles_lang)
316
+ load_log += log
317
+ if not available:
318
+ continue
319
+ loader_class = LOADER_CLASSES['youtube'].from_youtube_url
320
+ loader_class_kwargs = {'language': subtitles_lang}
321
+ else:
322
+ loader_class = LOADER_CLASSES['web']
323
+
324
+ try:
325
+ if requests.get(web_link).status_code != 200:
326
+ load_log += f'Ссылка недоступна для Python requests: {web_link}\n'
327
+ continue
328
+ load_documents = loader_class(web_link, **loader_class_kwargs).load()
329
+ if len(load_documents) == 0:
330
+ load_log += f'No text chunks were found at the link: {web_link}\n'
331
+ continue
332
+ documents.extend(load_documents)
333
+ except MissingSchema:
334
+ load_log += f'Invalid link: {web_link}\n'
335
+ continue
336
+ except Exception as ex:
337
+ load_log += f'Error loading data by web loader at link: {web_link}\n'
338
+ load_log += f'Error code: {ex}\n'
339
+ continue
340
+ return documents, load_log
341
+
342
+
343
+ # uploading files and generating documents and databases
344
+ def load_documents_and_create_db(
345
+ upload_files: Optional[List[str]],
346
+ web_links: str,
347
+ subtitles_lang: str,
348
+ chunk_size: int,
349
+ chunk_overlap: int,
350
+ embed_model_dict: EMBED_MODEL_DICT,
351
+ ) -> Tuple[List[Document], Optional[VectorStore], str]:
352
+
353
+ load_log = ''
354
+ all_documents = []
355
+ db = None
356
+ progress = gr.Progress()
357
+
358
+ embed_model = embed_model_dict.get('embed_model')
359
+ if embed_model is None:
360
+ load_log += 'Embeddings model not initialized, DB cannot be created'
361
+ return all_documents, db, load_log
362
+
363
+ if upload_files is None and not web_links:
364
+ load_log = 'No files or links selected'
365
+ return all_documents, db, load_log
366
+
367
+ if upload_files is not None:
368
+ progress(0.3, desc='Step 1/2: Upload documents from files')
369
+ docs, log = load_documents_from_files(upload_files)
370
+ all_documents.extend(docs)
371
+ load_log += log
372
+
373
+ if web_links:
374
+ progress(0.3 if upload_files is None else 0.5, desc='Step 1/2: Upload documents via links')
375
+ docs, log = load_documents_from_links(web_links, subtitles_lang)
376
+ all_documents.extend(docs)
377
+ load_log += log
378
+
379
+ if len(all_documents) == 0:
380
+ load_log += 'Download was interrupted because no documents were extracted\n'
381
+ load_log += 'RAG mode cannot be activated'
382
+ return all_documents, db, load_log
383
+
384
+ load_log += f'Documents loaded: {len(all_documents)}\n'
385
+ text_splitter = RecursiveCharacterTextSplitter(
386
+ chunk_size=chunk_size,
387
+ chunk_overlap=chunk_overlap,
388
+ )
389
+ documents = text_splitter.split_documents(all_documents)
390
+ documents = clear_documents(documents)
391
+ load_log += f'Documents are divided, number of text chunks: {len(documents)}\n'
392
+
393
+ progress(0.7, desc='Step 2/2: Initialize DB')
394
+ db = FAISS.from_documents(documents=documents, embedding=embed_model)
395
+ load_log += 'DB is initialized, RAG mode is activated and can be activated in the Chatbot tab'
396
+ return documents, db, load_log
397
+
398
+
399
+ # ------------------ ФУНКЦИИ ЧАТ БОТА ------------------------
400
+
401
+ # adding a user message to the chat bot window
402
+ def user_message_to_chatbot(user_message: str, chatbot: CHAT_HISTORY) -> Tuple[str, CHAT_HISTORY]:
403
+ chatbot.append([user_message, None])
404
+ return '', chatbot
405
+
406
+
407
+ # formatting prompt with adding context if DB is available and RAG mode is enabled
408
+ def update_user_message_with_context(
409
+ chatbot: CHAT_HISTORY,
410
+ rag_mode: bool,
411
+ db: VectorStore,
412
+ k: Union[int, str],
413
+ score_threshold: float,
414
+ ) -> Tuple[str, CHAT_HISTORY]:
415
+
416
+ user_message = chatbot[-1][0]
417
+ user_message_with_context = ''
418
+ if db is not None and rag_mode and user_message.strip():
419
+ if k == 'all':
420
+ k = len(db.docstore._dict)
421
+ docs_and_distances = db.similarity_search_with_relevance_scores(
422
+ user_message,
423
+ k=k,
424
+ score_threshold=score_threshold,
425
+ )
426
+ if len(docs_and_distances) > 0:
427
+ retriever_context = '\n\n'.join([doc[0].page_content for doc in docs_and_distances])
428
+ user_message_with_context = CONTEXT_TEMPLATE.format(
429
+ user_message=user_message,
430
+ context=retriever_context,
431
+ )
432
+ return user_message_with_context
433
+
434
+
435
+ # model response generation
436
+ def get_llm_response(
437
+ chatbot: CHAT_HISTORY,
438
+ llm_model_dict: LLM_MODEL_DICT,
439
+ user_message_with_context: str,
440
+ rag_mode: bool,
441
+ system_prompt: str,
442
+ support_system_role: bool,
443
+ history_len: int,
444
+ do_sample: bool,
445
+ *generate_args,
446
+ ) -> CHAT_HISTORY:
447
+
448
+ user_message = chatbot[-1][0]
449
+ if not user_message.strip():
450
+ yield chatbot[:-1]
451
+ return None
452
+
453
+ if rag_mode:
454
+ if user_message_with_context:
455
+ user_message = user_message_with_context
456
+ else:
457
+ gr.Info((
458
+ f'No documents relevant to the query were found, generation in RAG mode is not possible.\n'
459
+ f'Try reducing searh_score_threshold or disable RAG mode for normal generation'
460
+ ))
461
+ yield chatbot[:-1]
462
+ return None
463
+
464
+ llm_model = llm_model_dict.get('model')
465
+ gen_kwargs = dict(zip(GENERATE_KWARGS.keys(), generate_args))
466
+ gen_kwargs['top_k'] = int(gen_kwargs['top_k'])
467
+ if not do_sample:
468
+ gen_kwargs['top_p'] = 0.0
469
+ gen_kwargs['top_k'] = 1
470
+ gen_kwargs['repeat_penalty'] = 1.0
471
+
472
+ messages = []
473
+ if support_system_role and system_prompt:
474
+ messages.append({'role': 'system', 'content': system_prompt})
475
+
476
+ if history_len != 0:
477
+ for user_msg, bot_msg in chatbot[:-1][-history_len:]:
478
+ messages.append({'role': 'user', 'content': user_msg})
479
+ messages.append({'role': 'assistant', 'content': bot_msg})
480
+
481
+ messages.append({'role': 'user', 'content': user_message})
482
+ stream_response = llm_model.create_chat_completion(
483
+ messages=messages,
484
+ stream=True,
485
+ **gen_kwargs,
486
+ )
487
+ try:
488
+ chatbot[-1][1] = ''
489
+ for chunk in stream_response:
490
+ token = chunk['choices'][0]['delta'].get('content')
491
+ if token is not None:
492
+ chatbot[-1][1] += token
493
+ yield chatbot
494
+ except Exception as ex:
495
+ gr.Info(f'Error generating response, error code: {ex}')
496
  yield chatbot