omnidesk-ai-test / list_questions.py
makcrx
5
e42dbc0
import sqlite3, json
from contextlib import closing
from extract_keywords import extract_keywords
punctuation = '!"#\'(),:;?[]^`}{'
punctuation2 = '-/&._~+*=@<>[]\\'
remove_punctuation = str.maketrans(punctuation2, ' ' * len(punctuation2), punctuation)
def load_questions(sqlite_filename):
all_questions = []
with closing(sqlite3.connect(sqlite_filename)) as db:
db.row_factory = sqlite3.Row
with closing(db.cursor()) as cursor:
results = cursor.execute(
"SELECT id, articleId, title, category, section, questions FROM articles WHERE articleType = ? AND doNotUse IS NULL OR doNotUse = 0",
('article',)
).fetchall()
for res in results:
section = res['section'].lower()
title = res['title'].lower()
if section == 'служебная информация':
section = ''
title = ''
questions = json.loads(res['questions'])
for q in questions:
q['query'] = " ".join(section.split() + title.split() + q['question'].split()).translate(remove_punctuation).lower()
q['articleId'] = res['articleId']
all_questions += questions
return all_questions
#print("Loading questions from db...")
#questions = load_questions("omnidesk-ai-chatgpt-questions.sqlite")
#for q in questions:
# keywords = extract_keywords(q['query'])
# if (len(keywords) == 0):
# print(q)
# break