Spaces:

zhangjf
/

ask_ChatGPT_on_HuggingFace

Paused

App Files Files Community

ask_ChatGPT_on_HuggingFace / app.py

zhangjf

ignore queries too long

2ee4138 over 1 year ago

raw

history blame

11.2 kB

	import openai
	import tiktoken

	import numpy as np
	import concurrent
	import collections
	import threading
	import datetime
	import time
	import pytz
	import json
	import os

	openai.api_keys = os.getenv('API_KEYs').split("\n")
	openai.api_key = openai.api_keys[0]
	#print(os.getenv('API_KEYs'))

	timezone = pytz.timezone('Asia/Shanghai')
	timestamp2string = lambda timestamp: datetime.datetime.fromtimestamp(timestamp).astimezone(timezone).strftime('%Y-%m-%d %H:%M:%S')

	def num_tokens_from_messages(messages, model="gpt-3.5-turbo"):
	"""Returns the number of tokens used by a list of messages."""
	try:
	encoding = tiktoken.encoding_for_model(model)
	except KeyError:
	encoding = tiktoken.get_encoding("cl100k_base")
	if model == "gpt-3.5-turbo": # note: future models may deviate from this
	num_tokens = 0
	len_values = 0
	for message in messages:
	num_tokens += 4 # every message follows <im_start>{role/name}\n{content}<im_end>\n
	for key, value in message.items():
	try:
	num_tokens += len(encoding.encode(value))
	except:
	num_tokens += int(num_tokens/len_values*len(value)) # linear estimation
	len_values += len(value)
	if key == "name": # if there's a name, the role is omitted
	num_tokens += -1 # role is always required and always 1 token
	num_tokens += 2 # every reply is primed with <im_start>assistant
	return num_tokens
	else:
	raise NotImplementedError(f"""num_tokens_from_messages() is not presently implemented for model {model}.
	See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""")


	def read_qs():
	qs, qas = [], []
	directory = "./questions"
	filenames = [
	'math_question.txt',
	'qa_question.txt',
	'summarization_question.txt',
	]
	for filename in filenames:
	with open(f"{directory}/{filename}", "r", encoding="utf-8") as f:
	for idx,line in enumerate(f):
	qs.append(line.replace("生成摘要","生成中文摘要"))
	print(f"read {len(qs)} queries from files")

	if os.path.exists(f"{directory}/qas.json"):
	with open(f"{directory}/qas.json", "r", encoding="utf-8") as f:
	qas = json.loads(f.read())
	print(f"read {len(qas)} query-responses from qas.json")
	qas = [{"q":qa["q"], "a":qa["a"]} for qa in qas if qa["a"] is not None]
	print(f"keep {len(qas)} query-responses from qas.json")

	existed_qs = collections.Counter([qa["q"] for qa in qas])
	remained_qs = []
	for q in qs:
	if existed_qs[q]>0:
	existed_qs[q] -= 1
	else:
	remained_qs.append(q)
	print(f"filter out {len(qs)-len(remained_qs)} with reference to qas.json")
	qs = remained_qs

	return qs, qas

	qs, qas = read_qs()
	start_time = time.time()
	num_read_qas = len(qas)

	def ask(query, timeout=600):
	answer = None
	dead_time = time.time() + timeout
	attempt_times = 0
	while answer is None and time.time()<dead_time and attempt_times<10:
	try:
	messages=[
	{"role": "user", "content": query}
	]
	if num_tokens_from_messages(messages)>4096:
	return None
	answer = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=messages
	)["choices"][0]["message"]["content"]
	except Exception as e:
	if time.time()<dead_time:
	print(e)
	if "You exceeded your current quota, please check your plan and billing details." in str(e):
	idx = openai.api_keys.index(openai.api_key)
	idx = (idx + 1) % len(openai.api_keys)
	openai.api_key = openai.api_keys[idx]
	attempt_times += 0
	print(f"switch api_key")
	elif "Please reduce the length of the messages." in str(e):
	return None
	else:
	attempt_times += 1
	wait_time = int(attempt_times*10)
	time.sleep(wait_time)
	print(f"retry in {attempt_times*10} seconds...")
	return answer


	def askingChatGPT(qs, qas, min_interval_seconds=3, max_interval_seconds=15, max_retry_times=3):

	history_elapsed_time = [max_interval_seconds]*10

	for i, q in enumerate(qs):
	ask_start_time = time.time()

	#a = ask(q)
	def ask_(q, timeout):
	executor = concurrent.futures.ThreadPoolExecutor()
	future = executor.submit(ask, q, timeout) # 提交函数调用任务
	try:
	a = future.result(timeout=timeout) # 等待函数调用任务完成，超时时间为30秒
	return a
	except concurrent.futures.TimeoutError:
	print(f"ask call timed out after {timeout:.2f} seconds, retrying...")
	executor.shutdown(wait=False)
	return ask_(q, timeout*2) # 当超时时，重新调用函数

	retry_times = 0
	a = None
	while a is None and retry_times<max_retry_times:
	a = ask_(q, timeout=max(max_interval_seconds,np.mean(sorted(history_elapsed_time)[:8])))
	retry_times += 1

	qas.append({"q":q, "a":a})

	ask_end_time = time.time()
	elapsed_time = ask_end_time - ask_start_time
	history_elapsed_time = history_elapsed_time[1:] + [elapsed_time]
	delayTime = min_interval_seconds - elapsed_time
	if delayTime>0:
	time.sleep(delayTime)

	print(f"{timestamp2string(time.time())}: iterations: {i+1} / {len(qs)} \| elapsed time of this query (s): {elapsed_time:.2f}")

	return


	thread = threading.Thread(target=lambda :askingChatGPT(qs, qas))
	thread.daemon = True
	thread.start()


	import gradio as gr


	def showcase(access_key):
	if not access_key==os.getenv('access_key'):
	chatbot_ret = [(f"Your entered Access Key:<br>{access_key}<br>is incorrect.", f"So i cannot provide you any information in this private space.")]
	else:
	recent_qas = qas[-10:]
	chatbot_ret = [(f"Your entered Access Key is correct.", f"The latest {len(recent_qas)} query-responses are displayed below.")]
	for qa in recent_qas:
	chatbot_ret += [(qa["q"].replace("\n","<br>"), str(qa["a"]).replace("\n","<br>"))]
	return chatbot_ret


	def download(access_key):
	if not access_key.startswith(os.getenv('access_key')):
	chatbot_ret = [(f"Your entered Access Key:<br>{access_key}<br>is incorrect.", f"So i cannot provide you any information in this private space.")]
	file_ret = gr.File.update(value=None, visible=False)
	elif access_key == f"{os.getenv('access_key')}: update":
	chatbot_ret = [(f"Your entered Access Key is correct.", f"The file containing new processed query-responses ({len(qas)-num_read_qas} in total) can be downloaded below.")]
	filename = f"qas-{num_read_qas}-{len(qas)}.json"
	with open(filename, "w", encoding="utf-8") as f:
	f.write(json.dumps(qas[num_read_qas:], ensure_ascii=False, indent=2))
	file_ret = gr.File.update(value=filename, visible=True)
	else:
	chatbot_ret = [(f"Your entered Access Key is correct.", f"The file containing all processed query-responses ({len(qas)} in total) can be downloaded below.")]
	filename = f"qas-{len(qas)}.json"
	with open(filename, "w", encoding="utf-8") as f:
	f.write(json.dumps(qas, ensure_ascii=False, indent=2))
	file_ret = gr.File.update(value=filename, visible=True)
	return chatbot_ret, file_ret


	def display(access_key):
	if not access_key==os.getenv('access_key'):
	chatbot_ret = [(f"Your entered Access Key:<br>{access_key}<br>is incorrect.", f"So i cannot provide you any information in this private space.")]
	elif len(qas)-num_read_qas<1:
	chatbot_ret = [(f"Your entered Access Key is correct.", f"But the progress has just started for a while and has no useful progress information to provide.")]
	else:
	num_total_qs, num_processed_qs = len(qs), len(qas) - num_read_qas
	time_takes = time.time() - start_time
	time_remains = time_takes * (num_total_qs-num_processed_qs) / num_processed_qs
	end_time = start_time + time_takes + time_remains

	messages = []
	for qa in qas:
	messages.append({"role":"user", "content":qa["q"]})
	messages.append({"role":"assistant", "content":qa["a"] or ""})
	num_tokens_processed = num_tokens_from_messages(messages)
	num_tokens_total = int(num_tokens_processed * (num_total_qs+num_read_qas) / (num_processed_qs+num_read_qas))
	dollars_tokens_processed = 0.002 * int(num_tokens_processed/1000)
	dollars_tokens_total = 0.002 * int(num_tokens_total/1000)

	chatbot_ret = [(f"Your entered Access Key is correct.", f"The information of progress is displayed below.")]
	chatbot_ret += [(f"The number of processed / total queries:", f"{num_processed_qs} / {num_total_qs} (+{num_read_qas})")]
	chatbot_ret += [(f"The hours already takes / est. remains:", f"{time_takes/3600:.2f} / {time_remains/3600:.2f}")]
	chatbot_ret += [(f"The time starts / est. ends:", f"{timestamp2string(start_time)} / {timestamp2string(end_time)}")]
	chatbot_ret += [(f"The number of processed / est. total tokens:", f"{num_tokens_processed} / {num_tokens_total}")]
	chatbot_ret += [(f"The dollars of processed / est. total tokens:", f"{dollars_tokens_processed:.2f} / {dollars_tokens_total:.2f}")]

	return chatbot_ret


	with gr.Blocks() as demo:

	gr.Markdown(
	"""
	Hello friends,

	Thanks for your attention on this space. But this space is for my own use, i.e., building a dataset with answers from ChatGPT, and the access key for runtime feedback is only shared to my colleagues.

	If you want to ask ChatGPT on Huggingface just as the title says, you can try this [one](https://huggingface.co/spaces/zhangjf/chatbot) I built for public.
	"""
	)

	with gr.Column(variant="panel"):
	chatbot = gr.Chatbot()
	txt = gr.Textbox(show_label=False, placeholder="Enter your Access Key to access this private space").style(container=False)
	with gr.Row():
	button_showcase = gr.Button("Show Recent Query-Responses")
	button_download = gr.Button("Download All Query-Responses")
	button_display = gr.Button("Display Progress Infomation")

	downloadfile = gr.File(None, interactive=False, show_label=False, visible=False)

	button_showcase.click(fn=showcase, inputs=[txt], outputs=[chatbot])
	button_download.click(fn=download, inputs=[txt], outputs=[chatbot, downloadfile])
	button_display.click(fn=display, inputs=[txt], outputs=[chatbot])

	demo.launch()