Spaces:
Runtime error
Runtime error
Commit
·
82d9634
1
Parent(s):
6a3c13a
[IMP] translate to russian
Browse files- app.py +15 -6
- translator.py +14 -0
app.py
CHANGED
@@ -2,6 +2,8 @@ import warnings
|
|
2 |
|
3 |
from langchain_core._api import LangChainDeprecationWarning
|
4 |
|
|
|
|
|
5 |
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
6 |
warnings.filterwarnings("ignore", category=UserWarning)
|
7 |
warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
|
@@ -23,7 +25,7 @@ prev_files = None
|
|
23 |
retriever = None
|
24 |
|
25 |
|
26 |
-
def handle_files_and_query(query, files, chunk_overlap=50, token_per_chunk=256, bm_25_answers=200):
|
27 |
results = ""
|
28 |
global prev_files, retriever
|
29 |
files = [f.name for f in files]
|
@@ -51,10 +53,16 @@ def handle_files_and_query(query, files, chunk_overlap=50, token_per_chunk=256,
|
|
51 |
reranked_results = FAISS.from_documents(search_results, embeddings,
|
52 |
distance_strategy=DistanceStrategy.COSINE).similarity_search(query,
|
53 |
k=25)
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
return results
|
59 |
|
60 |
|
@@ -66,7 +74,8 @@ interface = gr.Interface(
|
|
66 |
gr.Slider(minimum=1, maximum=100, value=50, label="Chunk Overlap"),
|
67 |
gr.Slider(minimum=64, maximum=512, value=256, label="Tokens Per Chunk (чем больше - тем бОльшие куски книги "
|
68 |
"сможем находить)"),
|
69 |
-
gr.Slider(minimum=1, maximum=1000, value=200, label="BM25 Answers (чем больше - тем больше будем учитывать неявные смысловые сравнения слов)")
|
|
|
70 |
],
|
71 |
outputs="text",
|
72 |
title="Similarity Search for eksmo books"
|
|
|
2 |
|
3 |
from langchain_core._api import LangChainDeprecationWarning
|
4 |
|
5 |
+
import translator
|
6 |
+
|
7 |
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
8 |
warnings.filterwarnings("ignore", category=UserWarning)
|
9 |
warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
|
|
|
25 |
retriever = None
|
26 |
|
27 |
|
28 |
+
def handle_files_and_query(query, files, chunk_overlap=50, token_per_chunk=256, bm_25_answers=200, translate_to_ru=False):
|
29 |
results = ""
|
30 |
global prev_files, retriever
|
31 |
files = [f.name for f in files]
|
|
|
53 |
reranked_results = FAISS.from_documents(search_results, embeddings,
|
54 |
distance_strategy=DistanceStrategy.COSINE).similarity_search(query,
|
55 |
k=25)
|
56 |
+
if translate_to_ru:
|
57 |
+
results = "\n".join([
|
58 |
+
f"Source: {re.search(pattern, result.metadata['file_path']).group(0)}\nPage: {result.metadata['page']}\nContent:\n{translator.translate(result.page_content, 'russian')}\n"
|
59 |
+
for result in reranked_results
|
60 |
+
])
|
61 |
+
else:
|
62 |
+
results = "\n".join([
|
63 |
+
f"Source: {re.search(pattern, result.metadata['file_path']).group(0)}\nPage: {result.metadata['page']}\nContent:\n{result.page_content}\n"
|
64 |
+
for result in reranked_results
|
65 |
+
])
|
66 |
return results
|
67 |
|
68 |
|
|
|
74 |
gr.Slider(minimum=1, maximum=100, value=50, label="Chunk Overlap"),
|
75 |
gr.Slider(minimum=64, maximum=512, value=256, label="Tokens Per Chunk (чем больше - тем бОльшие куски книги "
|
76 |
"сможем находить)"),
|
77 |
+
gr.Slider(minimum=1, maximum=1000, value=200, label="BM25 Answers (чем больше - тем больше будем учитывать неявные смысловые сравнения слов)"),
|
78 |
+
gr.Checkbox(label="Translate to Russian", value=False),
|
79 |
],
|
80 |
outputs="text",
|
81 |
title="Similarity Search for eksmo books"
|
translator.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import openai
|
2 |
+
|
3 |
+
|
4 |
+
def translate(text, target_lang):
|
5 |
+
"""Translate text to target language using OpenAI's GPT-3 API."""
|
6 |
+
client = openai.Client()
|
7 |
+
response = client.chat.completions.create(
|
8 |
+
messages=[{"role": "system", "content": f"You are AI-translator and you should translate text to {target_lang}"},
|
9 |
+
{'role': 'user', 'content': f'Please translate this text to {target_lang}: {text}. '
|
10 |
+
f'Answer with tranlsatrion and no additional information.'},
|
11 |
+
],
|
12 |
+
model="gpt-3.5-turbo",
|
13 |
+
)
|
14 |
+
return response.choices[0].message.content
|