{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import sqlite3, json\n", "from contextlib import closing\n", "\n", "def load_questions(sqlite_filename):\n", " all_questions = []\n", " with closing(sqlite3.connect(sqlite_filename)) as db:\n", " db.row_factory = sqlite3.Row\n", " with closing(db.cursor()) as cursor:\n", " results = cursor.execute(\n", " \"SELECT id, articleId, title, category, section, questions FROM articles WHERE articleType = ? AND doNotUse IS NULL OR doNotUse = 0\",\n", " ('article',)\n", " ).fetchall()\n", " \n", " for res in results:\n", " \n", " questions = json.loads(res['questions'])\n", " # questions_copy = questions.copy()\n", " \n", " for q in questions:\n", " q['query'] = \" \".join(res['section'].split() + res['title'].split() + q['question'].split()).lower()\n", " q['articleId'] = res['articleId']\n", " \n", " # for q in questions_copy:\n", " # q['query'] = q['question']\n", " # q['articleId'] = res['articleId']\n", "\n", " all_questions += questions\n", " # all_questions += questions_copy\n", " return all_questions\n", "\n", "questions = load_questions(\"omnidesk-ai-chatgpt-questions.sqlite\")" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from langchain.vectorstores import FAISS\n", "from langchain.docstore.document import Document\n", "from langchain.embeddings import SentenceTransformerEmbeddings" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "docs = [\n", " Document(page_content=q['query'], metadata={ 'answer': q['answer'], 'articleId': q['articleId'] })\n", " for q in questions\n", "]" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2023-08-07 17:36:37.358149: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] } ], "source": [ "from extract_keywords import canonical_keywords, merge_keywords, tokenize_sentence, extract_keywords, init_keyword_extractor\n", "init_keyword_extractor()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/makcrx/anaconda3/lib/python3.10/site-packages/sklearn/feature_extraction/text.py:528: UserWarning: The parameter 'token_pattern' will not be used since 'tokenizer' is not None'\n", " warnings.warn(\n" ] }, { "data": { "text/plain": [ "['почта россия трекинг']" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "extract_keywords('пр трекинг')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n",
       " /tmp/ipykernel_1594240/2036088539.py:1 in <module>                                               \n",
       "                                                                                                  \n",
       " [Errno 2] No such file or directory: '/tmp/ipykernel_1594240/2036088539.py'                      \n",
       "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
       "NameError: name 'SentenceTransformerEmbeddings' is not defined\n",
       "
\n" ], "text/plain": [ "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/tmp/ipykernel_1594240/\u001b[0m\u001b[1;33m2036088539.py\u001b[0m:\u001b[94m1\u001b[0m in \u001b[92m\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[3;31m[Errno 2] No such file or directory: '/tmp/ipykernel_1594240/2036088539.py'\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n", "\u001b[1;91mNameError: \u001b[0mname \u001b[32m'SentenceTransformerEmbeddings'\u001b[0m is not defined\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "embeddings = SentenceTransformerEmbeddings(model_name=\"multi-qa-MiniLM-L6-cos-v1\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "output_dir = 'faiss_qa'" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "db = FAISS.from_documents(docs, embeddings)\n", "db.save_local(output_dir)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%pip install faiss-cpu langchain\n", "%pip install -U pydantic==1.10.7" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/makcrx/anaconda3/lib/python3.10/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.6.15) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n", " warnings.warn(\n", "2023-08-03 23:36:22.968666: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] } ], "source": [ "from langchain.vectorstores import FAISS\n", "from langchain.embeddings import SentenceTransformerEmbeddings\n", "\n", "embeddings = SentenceTransformerEmbeddings(model_name=\"multi-qa-MiniLM-L6-cos-v1\")\n", "db = FAISS.load_local('faiss_qa', embeddings)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "query = 'Почему у триггеров Почт России симваольный код не тот?'\n", "result_docs = db.similarity_search_with_score(query, k=20)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "import reranking\n", "\n", "sentences = [doc[0].page_content for doc in result_docs]\n", "score, index = reranking.search(query, sentences)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Символьный код доставки Почта России - russian-post.'" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result_docs[index][0].metadata['answer']" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Exception in thread Thread-9 (run):\n", "Traceback (most recent call last):\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 27, in run\n", " loop = asyncio.get_event_loop()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 45, in _get_event_loop\n", " loop = events.get_event_loop_policy().get_event_loop()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/asyncio/events.py\", line 671, in get_event_loop\n", " raise RuntimeError('There is no current event loop in thread %r.'\n", "RuntimeError: There is no current event loop in thread 'Thread-9 (run)'.\n", "\n", "During handling of the above exception, another exception occurred:\n", "\n", "Traceback (most recent call last):\n", " File \"/home/makcrx/anaconda3/lib/python3.10/threading.py\", line 1016, in _bootstrap_inner\n", " self.run()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/threading.py\", line 953, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/uvicorn/server.py\", line 61, in run\n", " return asyncio.run(self.serve(sockets=sockets))\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 31, in run\n", " _patch_loop(loop)\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 175, in _patch_loop\n", " raise ValueError('Can\\'t patch loop of type %s' % type(loop))\n", "ValueError: Can't patch loop of type \n", "/home/makcrx/anaconda3/lib/python3.10/threading.py:1018: RuntimeWarning: coroutine 'Server.serve' was never awaited\n", " self._invoke_excepthook(self)\n", "RuntimeWarning: Enable tracemalloc to get the object allocation traceback\n", "Exception in thread Thread-10 (run):\n", "Traceback (most recent call last):\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 27, in run\n", " loop = asyncio.get_event_loop()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 45, in _get_event_loop\n", " loop = events.get_event_loop_policy().get_event_loop()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/asyncio/events.py\", line 671, in get_event_loop\n", " raise RuntimeError('There is no current event loop in thread %r.'\n", "RuntimeError: There is no current event loop in thread 'Thread-10 (run)'.\n", "\n", "During handling of the above exception, another exception occurred:\n", "\n", "Traceback (most recent call last):\n", " File \"/home/makcrx/anaconda3/lib/python3.10/threading.py\", line 1016, in _bootstrap_inner\n", " self.run()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/threading.py\", line 953, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/uvicorn/server.py\", line 61, in run\n", " return asyncio.run(self.serve(sockets=sockets))\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 31, in run\n", " _patch_loop(loop)\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 175, in _patch_loop\n", " raise ValueError('Can\\'t patch loop of type %s' % type(loop))\n", "ValueError: Can't patch loop of type \n", "Exception in thread Thread-11 (run):\n", "Traceback (most recent call last):\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 27, in run\n", " loop = asyncio.get_event_loop()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 45, in _get_event_loop\n", " loop = events.get_event_loop_policy().get_event_loop()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/asyncio/events.py\", line 671, in get_event_loop\n", " raise RuntimeError('There is no current event loop in thread %r.'\n", "RuntimeError: There is no current event loop in thread 'Thread-11 (run)'.\n", "\n", "During handling of the above exception, another exception occurred:\n", "\n", "Traceback (most recent call last):\n", " File \"/home/makcrx/anaconda3/lib/python3.10/threading.py\", line 1016, in _bootstrap_inner\n", " self.run()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/threading.py\", line 953, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/uvicorn/server.py\", line 61, in run\n", " return asyncio.run(self.serve(sockets=sockets))\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 31, in run\n", " _patch_loop(loop)\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 175, in _patch_loop\n", " raise ValueError('Can\\'t patch loop of type %s' % type(loop))\n", "ValueError: Can't patch loop of type \n", "Exception in thread Thread-12 (run):\n", "Traceback (most recent call last):\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 27, in run\n", " loop = asyncio.get_event_loop()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 45, in _get_event_loop\n", " loop = events.get_event_loop_policy().get_event_loop()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/asyncio/events.py\", line 671, in get_event_loop\n", " raise RuntimeError('There is no current event loop in thread %r.'\n", "RuntimeError: There is no current event loop in thread 'Thread-12 (run)'.\n", "\n", "During handling of the above exception, another exception occurred:\n", "\n", "Traceback (most recent call last):\n", " File \"/home/makcrx/anaconda3/lib/python3.10/threading.py\", line 1016, in _bootstrap_inner\n", " self.run()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/threading.py\", line 953, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/uvicorn/server.py\", line 61, in run\n", " return asyncio.run(self.serve(sockets=sockets))\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 31, in run\n", " _patch_loop(loop)\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 175, in _patch_loop\n", " raise ValueError('Can\\'t patch loop of type %s' % type(loop))\n", "ValueError: Can't patch loop of type \n", "Exception in thread Thread-13 (run):\n", "Traceback (most recent call last):\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 27, in run\n", " loop = asyncio.get_event_loop()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 45, in _get_event_loop\n", " loop = events.get_event_loop_policy().get_event_loop()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/asyncio/events.py\", line 671, in get_event_loop\n", " raise RuntimeError('There is no current event loop in thread %r.'\n", "RuntimeError: There is no current event loop in thread 'Thread-13 (run)'.\n", "\n", "During handling of the above exception, another exception occurred:\n", "\n", "Traceback (most recent call last):\n", " File \"/home/makcrx/anaconda3/lib/python3.10/threading.py\", line 1016, in _bootstrap_inner\n", " self.run()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/threading.py\", line 953, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/uvicorn/server.py\", line 61, in run\n", " return asyncio.run(self.serve(sockets=sockets))\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 31, in run\n", " _patch_loop(loop)\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 175, in _patch_loop\n", " raise ValueError('Can\\'t patch loop of type %s' % type(loop))\n", "ValueError: Can't patch loop of type \n", "Exception in thread Thread-14 (run):\n", "Traceback (most recent call last):\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 27, in run\n", " loop = asyncio.get_event_loop()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 45, in _get_event_loop\n", " loop = events.get_event_loop_policy().get_event_loop()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/asyncio/events.py\", line 671, in get_event_loop\n", " raise RuntimeError('There is no current event loop in thread %r.'\n", "RuntimeError: There is no current event loop in thread 'Thread-14 (run)'.\n", "\n", "During handling of the above exception, another exception occurred:\n", "\n", "Traceback (most recent call last):\n", " File \"/home/makcrx/anaconda3/lib/python3.10/threading.py\", line 1016, in _bootstrap_inner\n", " self.run()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/threading.py\", line 953, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/uvicorn/server.py\", line 61, in run\n", " return asyncio.run(self.serve(sockets=sockets))\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 31, in run\n", " _patch_loop(loop)\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 175, in _patch_loop\n", " raise ValueError('Can\\'t patch loop of type %s' % type(loop))\n", "ValueError: Can't patch loop of type \n", "Exception in thread Thread-15 (run):\n", "Traceback (most recent call last):\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 27, in run\n", " loop = asyncio.get_event_loop()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 45, in _get_event_loop\n", " loop = events.get_event_loop_policy().get_event_loop()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/asyncio/events.py\", line 671, in get_event_loop\n", " raise RuntimeError('There is no current event loop in thread %r.'\n", "RuntimeError: There is no current event loop in thread 'Thread-15 (run)'.\n", "\n", "During handling of the above exception, another exception occurred:\n", "\n", "Traceback (most recent call last):\n", " File \"/home/makcrx/anaconda3/lib/python3.10/threading.py\", line 1016, in _bootstrap_inner\n", " self.run()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/threading.py\", line 953, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/uvicorn/server.py\", line 61, in run\n", " return asyncio.run(self.serve(sockets=sockets))\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 31, in run\n", " _patch_loop(loop)\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 175, in _patch_loop\n", " raise ValueError('Can\\'t patch loop of type %s' % type(loop))\n", "ValueError: Can't patch loop of type \n", "Exception in thread Thread-16 (run):\n", "Traceback (most recent call last):\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 27, in run\n", " loop = asyncio.get_event_loop()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 45, in _get_event_loop\n", " loop = events.get_event_loop_policy().get_event_loop()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/asyncio/events.py\", line 671, in get_event_loop\n", " raise RuntimeError('There is no current event loop in thread %r.'\n", "RuntimeError: There is no current event loop in thread 'Thread-16 (run)'.\n", "\n", "During handling of the above exception, another exception occurred:\n", "\n", "Traceback (most recent call last):\n", " File \"/home/makcrx/anaconda3/lib/python3.10/threading.py\", line 1016, in _bootstrap_inner\n", " self.run()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/threading.py\", line 953, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/uvicorn/server.py\", line 61, in run\n", " return asyncio.run(self.serve(sockets=sockets))\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 31, in run\n", " _patch_loop(loop)\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 175, in _patch_loop\n", " raise ValueError('Can\\'t patch loop of type %s' % type(loop))\n", "ValueError: Can't patch loop of type \n", "Exception in thread Thread-17 (run):\n", "Traceback (most recent call last):\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 27, in run\n", " loop = asyncio.get_event_loop()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 45, in _get_event_loop\n", " loop = events.get_event_loop_policy().get_event_loop()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/asyncio/events.py\", line 671, in get_event_loop\n", " raise RuntimeError('There is no current event loop in thread %r.'\n", "RuntimeError: There is no current event loop in thread 'Thread-17 (run)'.\n", "\n", "During handling of the above exception, another exception occurred:\n", "\n", "Traceback (most recent call last):\n", " File \"/home/makcrx/anaconda3/lib/python3.10/threading.py\", line 1016, in _bootstrap_inner\n", " self.run()\n", " File \"/home/makcrx/anaconda3/lib/python3.10/threading.py\", line 953, in run\n", " self._target(*self._args, **self._kwargs)\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/uvicorn/server.py\", line 61, in run\n", " return asyncio.run(self.serve(sockets=sockets))\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 31, in run\n", " _patch_loop(loop)\n", " File \"/home/makcrx/anaconda3/lib/python3.10/site-packages/nest_asyncio.py\", line 175, in _patch_loop\n", " raise ValueError('Can\\'t patch loop of type %s' % type(loop))\n", "ValueError: Can't patch loop of type \n" ] } ], "source": [ "import gradio as gr\n", "import reranking\n", "\n", "def main(query):\n", " result_docs = db.similarity_search_with_score(query, k=20)\n", " sentences = [doc[0].page_content for doc in result_docs]\n", " score, index = reranking.search(query, sentences)\n", " return result_docs[index][0].metadata['answer']\n", "\n", "demo = gr.Interface(fn=main, inputs=\"text\", outputs=\"text\")\n", "\n", "demo.launch()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.9" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }