|
from elasticsearch import Elasticsearch |
|
import os |
|
import json |
|
import requests |
|
|
|
ES_URL = os.environ["ES_URL"] |
|
ES_USER = os.environ["ES_USER"] |
|
ES_PASS = os.environ["ES_PASS"] |
|
ES_CA_CERT = os.environ["ES_CA_CERT"] |
|
|
|
|
|
class ESGPT: |
|
def __init__(self, index_name): |
|
self.es = Elasticsearch(ES_URL, http_auth=(ES_USER, ES_PASS), |
|
ca_certs=ES_CA_CERT, verify_certs=True) |
|
self.index_name = index_name |
|
self.model_engine = os.environ["OPENAI_GPT_ENGINE"] |
|
self.api_key = os.environ["OPENAI_API_KEY"] |
|
|
|
def index(self, doc_id, doc): |
|
self.es.index(index=self.index_name, |
|
id=doc_id, |
|
document=doc) |
|
|
|
def search(self, query): |
|
body = { |
|
"query": { |
|
"query_string": {"query": query} |
|
} |
|
} |
|
|
|
results = self.es.search(index=self.index_name, body=body) |
|
return results['hits']['hits'] |
|
|
|
def _paper_results_to_text(self, results): |
|
text_result = "" |
|
for paper in results: |
|
title = "" |
|
if "title" in paper["_source"]: |
|
title = paper["_source"]["title"] |
|
|
|
abstract = "" |
|
if "abctract" in paper["_source"]: |
|
abstract = paper["_source"]["abstract"] |
|
|
|
paper_str = f"{title}:\n{abstract[:100]}\n\n" |
|
text_result += paper_str |
|
return text_result |
|
|
|
def summarize(self, query, results): |
|
|
|
result_json_str = self._paper_results_to_text(results) |
|
if result_json_str == "": |
|
result_json_str = "No results found" |
|
|
|
print(result_json_str[:500]) |
|
|
|
body = { |
|
"model": self.model_engine, |
|
"prompt": f"Please summarize the following search results for query: {query}:\n{result_json_str[:1000]}", |
|
"max_tokens": 1000, |
|
"n": 1, |
|
"stop": None, |
|
"temperature": 0.5, |
|
"stream": True, |
|
} |
|
|
|
headers = {"Content-Type": "application/json", |
|
"Authorization": f"Bearer {self.api_key}"} |
|
|
|
resp = requests.post("https://api.openai.com/v1/completions", |
|
headers=headers, |
|
data=json.dumps(body), |
|
stream=True) |
|
return resp |
|
|