|
import openai |
|
import tiktoken |
|
|
|
import numpy as np |
|
import concurrent |
|
import collections |
|
import threading |
|
import datetime |
|
import time |
|
import pytz |
|
import json |
|
import os |
|
|
|
openai.api_keys = os.getenv('API_KEYs').split("\n") |
|
openai.api_key = openai.api_keys[0] |
|
|
|
|
|
timezone = pytz.timezone('Asia/Shanghai') |
|
timestamp2string = lambda timestamp: datetime.datetime.fromtimestamp(timestamp).astimezone(timezone).strftime('%Y-%m-%d %H:%M:%S') |
|
|
|
def num_tokens_from_messages(messages, model="gpt-3.5-turbo"): |
|
"""Returns the number of tokens used by a list of messages.""" |
|
try: |
|
encoding = tiktoken.encoding_for_model(model) |
|
except KeyError: |
|
encoding = tiktoken.get_encoding("cl100k_base") |
|
if model == "gpt-3.5-turbo": |
|
num_tokens = 0 |
|
len_values = 0 |
|
for message in messages: |
|
num_tokens += 4 |
|
for key, value in message.items(): |
|
try: |
|
num_tokens += len(encoding.encode(value)) |
|
except: |
|
num_tokens += int(num_tokens/len_values*len(value)) |
|
len_values += len(value) |
|
if key == "name": |
|
num_tokens += -1 |
|
num_tokens += 2 |
|
return num_tokens |
|
else: |
|
raise NotImplementedError(f"""num_tokens_from_messages() is not presently implemented for model {model}. |
|
See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""") |
|
|
|
|
|
def read_qs(): |
|
qs, qas = [], [] |
|
directory = "./questions" |
|
filenames = [ |
|
'math_question.txt', |
|
'qa_question.txt', |
|
'summarization_question.txt', |
|
] |
|
for filename in filenames: |
|
with open(f"{directory}/{filename}", "r", encoding="utf-8") as f: |
|
for idx,line in enumerate(f): |
|
qs.append(line.replace("生成摘要","生成中文摘要")) |
|
print(f"read {len(qs)} queries from files") |
|
|
|
if os.path.exists(f"{directory}/qas.json"): |
|
with open(f"{directory}/qas.json", "r", encoding="utf-8") as f: |
|
qas = json.loads(f.read()) |
|
print(f"read {len(qas)} query-responses from qas.json") |
|
qas = [{"q":qa["q"], "a":qa["a"]} for qa in qas if qa["a"] is not None] |
|
print(f"keep {len(qas)} query-responses from qas.json") |
|
|
|
existed_qs = collections.Counter([qa["q"] for qa in qas]) |
|
remained_qs = [] |
|
for q in qs: |
|
if existed_qs[q]>0: |
|
existed_qs[q] -= 1 |
|
else: |
|
remained_qs.append(q) |
|
print(f"filter out {len(qs)-len(remained_qs)} with reference to qas.json") |
|
qs = remained_qs |
|
|
|
return qs, qas |
|
|
|
qs, qas = read_qs() |
|
start_time = time.time() |
|
num_read_qas = len(qas) |
|
|
|
def ask(query, timeout=600): |
|
answer = None |
|
dead_time = time.time() + timeout |
|
attempt_times = 0 |
|
while answer is None and time.time()<dead_time and attempt_times<10: |
|
try: |
|
messages=[ |
|
{"role": "user", "content": query} |
|
] |
|
if num_tokens_from_messages(messages)>4096: |
|
return None |
|
answer = openai.ChatCompletion.create( |
|
model="gpt-3.5-turbo", |
|
messages=messages |
|
)["choices"][0]["message"]["content"] |
|
except Exception as e: |
|
if time.time()<dead_time: |
|
print(e) |
|
if "You exceeded your current quota, please check your plan and billing details." in str(e): |
|
idx = openai.api_keys.index(openai.api_key) |
|
idx = (idx + 1) % len(openai.api_keys) |
|
openai.api_key = openai.api_keys[idx] |
|
attempt_times += 0 |
|
print(f"switch api_key") |
|
elif "Please reduce the length of the messages." in str(e): |
|
return None |
|
else: |
|
attempt_times += 1 |
|
wait_time = int(attempt_times*10) |
|
time.sleep(wait_time) |
|
print(f"retry in {attempt_times*10} seconds...") |
|
return answer |
|
|
|
|
|
def askingChatGPT(qs, qas, min_interval_seconds=3, max_interval_seconds=15, max_retry_times=3): |
|
|
|
history_elapsed_time = [max_interval_seconds]*10 |
|
|
|
for i, q in enumerate(qs): |
|
ask_start_time = time.time() |
|
|
|
|
|
def ask_(q, timeout): |
|
executor = concurrent.futures.ThreadPoolExecutor() |
|
future = executor.submit(ask, q, timeout) |
|
try: |
|
a = future.result(timeout=timeout) |
|
return a |
|
except concurrent.futures.TimeoutError: |
|
print(f"ask call timed out after {timeout:.2f} seconds, retrying...") |
|
executor.shutdown(wait=False) |
|
return ask_(q, timeout*2) |
|
|
|
retry_times = 0 |
|
a = None |
|
while a is None and retry_times<max_retry_times: |
|
a = ask_(q, timeout=max(max_interval_seconds,np.mean(sorted(history_elapsed_time)[:8]))) |
|
retry_times += 1 |
|
|
|
qas.append({"q":q, "a":a}) |
|
|
|
ask_end_time = time.time() |
|
elapsed_time = ask_end_time - ask_start_time |
|
history_elapsed_time = history_elapsed_time[1:] + [elapsed_time] |
|
delayTime = min_interval_seconds - elapsed_time |
|
if delayTime>0: |
|
time.sleep(delayTime) |
|
|
|
print(f"{timestamp2string(time.time())}: iterations: {i+1} / {len(qs)} | elapsed time of this query (s): {elapsed_time:.2f}") |
|
|
|
return |
|
|
|
|
|
thread = threading.Thread(target=lambda :askingChatGPT(qs, qas)) |
|
thread.daemon = True |
|
thread.start() |
|
|
|
|
|
import gradio as gr |
|
|
|
|
|
def showcase(access_key): |
|
if not access_key==os.getenv('access_key'): |
|
chatbot_ret = [(f"Your entered Access Key:<br>{access_key}<br>is incorrect.", f"So i cannot provide you any information in this private space.")] |
|
else: |
|
recent_qas = qas[-10:] |
|
chatbot_ret = [(f"Your entered Access Key is correct.", f"The latest {len(recent_qas)} query-responses are displayed below.")] |
|
for qa in recent_qas: |
|
chatbot_ret += [(qa["q"].replace("\n","<br>"), str(qa["a"]).replace("\n","<br>"))] |
|
return chatbot_ret |
|
|
|
|
|
def download(access_key): |
|
if not access_key.startswith(os.getenv('access_key')): |
|
chatbot_ret = [(f"Your entered Access Key:<br>{access_key}<br>is incorrect.", f"So i cannot provide you any information in this private space.")] |
|
file_ret = gr.File.update(value=None, visible=False) |
|
elif access_key == f"{os.getenv('access_key')}: update": |
|
chatbot_ret = [(f"Your entered Access Key is correct.", f"The file containing new processed query-responses ({len(qas)-num_read_qas} in total) can be downloaded below.")] |
|
filename = f"qas-{num_read_qas}-{len(qas)}.json" |
|
with open(filename, "w", encoding="utf-8") as f: |
|
f.write(json.dumps(qas[num_read_qas:], ensure_ascii=False, indent=2)) |
|
file_ret = gr.File.update(value=filename, visible=True) |
|
else: |
|
chatbot_ret = [(f"Your entered Access Key is correct.", f"The file containing all processed query-responses ({len(qas)} in total) can be downloaded below.")] |
|
filename = f"qas-{len(qas)}.json" |
|
with open(filename, "w", encoding="utf-8") as f: |
|
f.write(json.dumps(qas, ensure_ascii=False, indent=2)) |
|
file_ret = gr.File.update(value=filename, visible=True) |
|
return chatbot_ret, file_ret |
|
|
|
|
|
def display(access_key): |
|
if not access_key==os.getenv('access_key'): |
|
chatbot_ret = [(f"Your entered Access Key:<br>{access_key}<br>is incorrect.", f"So i cannot provide you any information in this private space.")] |
|
elif len(qas)-num_read_qas<1: |
|
chatbot_ret = [(f"Your entered Access Key is correct.", f"But the progress has just started for a while and has no useful progress information to provide.")] |
|
else: |
|
num_total_qs, num_processed_qs = len(qs), len(qas) - num_read_qas |
|
time_takes = time.time() - start_time |
|
time_remains = time_takes * (num_total_qs-num_processed_qs) / num_processed_qs |
|
end_time = start_time + time_takes + time_remains |
|
|
|
messages = [] |
|
for qa in qas: |
|
messages.append({"role":"user", "content":qa["q"]}) |
|
messages.append({"role":"assistant", "content":qa["a"] or ""}) |
|
num_tokens_processed = num_tokens_from_messages(messages) |
|
num_tokens_total = int(num_tokens_processed * (num_total_qs+num_read_qas) / (num_processed_qs+num_read_qas)) |
|
dollars_tokens_processed = 0.002 * int(num_tokens_processed/1000) |
|
dollars_tokens_total = 0.002 * int(num_tokens_total/1000) |
|
|
|
chatbot_ret = [(f"Your entered Access Key is correct.", f"The information of progress is displayed below.")] |
|
chatbot_ret += [(f"The number of processed / total queries:", f"{num_processed_qs} / {num_total_qs} (+{num_read_qas})")] |
|
chatbot_ret += [(f"The hours already takes / est. remains:", f"{time_takes/3600:.2f} / {time_remains/3600:.2f}")] |
|
chatbot_ret += [(f"The time starts / est. ends:", f"{timestamp2string(start_time)} / {timestamp2string(end_time)}")] |
|
chatbot_ret += [(f"The number of processed / est. total tokens:", f"{num_tokens_processed} / {num_tokens_total}")] |
|
chatbot_ret += [(f"The dollars of processed / est. total tokens:", f"{dollars_tokens_processed:.2f} / {dollars_tokens_total:.2f}")] |
|
|
|
return chatbot_ret |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
|
gr.Markdown( |
|
""" |
|
Hello friends, |
|
|
|
Thanks for your attention on this space. But this space is for my own use, i.e., building a dataset with answers from ChatGPT, and the access key for runtime feedback is only shared to my colleagues. |
|
|
|
If you want to ask ChatGPT on Huggingface just as the title says, you can try this [one](https://huggingface.co/spaces/zhangjf/chatbot) I built for public. |
|
""" |
|
) |
|
|
|
with gr.Column(variant="panel"): |
|
chatbot = gr.Chatbot() |
|
txt = gr.Textbox(show_label=False, placeholder="Enter your Access Key to access this private space").style(container=False) |
|
with gr.Row(): |
|
button_showcase = gr.Button("Show Recent Query-Responses") |
|
button_download = gr.Button("Download All Query-Responses") |
|
button_display = gr.Button("Display Progress Infomation") |
|
|
|
downloadfile = gr.File(None, interactive=False, show_label=False, visible=False) |
|
|
|
button_showcase.click(fn=showcase, inputs=[txt], outputs=[chatbot]) |
|
button_download.click(fn=download, inputs=[txt], outputs=[chatbot, downloadfile]) |
|
button_display.click(fn=display, inputs=[txt], outputs=[chatbot]) |
|
|
|
demo.launch() |