Spaces:

allenai
/

BaseChat

Running

App Files Files Community

BaseChat / app.py

yuchenlin

add yi 6B

a19b85a 5 months ago

raw

history blame

7.16 kB

	import gradio as gr
	from openai import OpenAI
	import os
	from typing import List
	import logging

	# add logging info to console
	logging.basicConfig(level=logging.INFO)


	BASE_URL = "https://api.together.xyz/v1"
	DEFAULT_API_KEY = os.getenv("TOGETHER_API_KEY")
	import urllib.request
	URIAL_VERSION = "inst_1k_v4.help"

	urial_url = f"https://raw.githubusercontent.com/Re-Align/URIAL/main/urial_prompts/{URIAL_VERSION}.txt"
	urial_prompt = urllib.request.urlopen(urial_url).read().decode('utf-8')
	urial_prompt = urial_prompt.replace("```", '"""')
	stop_str = ['"""', '# Query:', '# Answer:']

	def urial_template(urial_prompt, history, message):
	current_prompt = urial_prompt + "\n"
	for user_msg, ai_msg in history:
	current_prompt += f'# Query:\n"""\n{user_msg}\n"""\n\n# Answer:\n"""\n{ai_msg}\n"""\n\n'
	current_prompt += f'# Query:\n"""\n{message}\n"""\n\n# Answer:\n"""\n'
	return current_prompt




	def openai_base_request(
	model: str=None,
	temperature: float=0,
	max_tokens: int=512,
	top_p: float=1.0,
	prompt: str=None,
	n: int=1,
	repetition_penalty: float=1.0,
	stop: List[str]=None,
	api_key: str=None,
	):
	if api_key is None:
	api_key = DEFAULT_API_KEY
	client = OpenAI(api_key=api_key, base_url=BASE_URL)
	# print(f"Requesting chat completion from OpenAI API with model {model}")
	logging.info(f"Requesting chat completion from OpenAI API with model {model}")
	logging.info(f"Prompt: {prompt}")
	logging.info(f"Temperature: {temperature}")
	logging.info(f"Max tokens: {max_tokens}")
	logging.info(f"Top-p: {top_p}")
	logging.info(f"Repetition penalty: {repetition_penalty}")
	logging.info(f"Stop: {stop}")

	request = client.completions.create(
	model=model,
	prompt=prompt,
	temperature=float(temperature),
	max_tokens=int(max_tokens),
	top_p=float(top_p),
	n=n,
	extra_body={'repetition_penalty': float(repetition_penalty)},
	stop=stop,
	stream=True
	)

	return request




	def respond(
	message,
	history: list[tuple[str, str]],
	max_tokens,
	temperature,
	top_p,
	rp,
	model_name,
	together_api_key
	):
	global stop_str, urial_prompt
	rp = 1.0
	prompt = urial_template(urial_prompt, history, message)
	if model_name == "Llama-3-8B":
	_model_name = "meta-llama/Llama-3-8b-hf"
	elif model_name == "Llama-3-70B":
	_model_name = "meta-llama/Llama-3-70b-hf"
	elif model_name == "Llama-2-7B":
	_model_name = "meta-llama/Llama-2-7b-hf"
	elif model_name == "Llama-2-70B":
	_model_name = "meta-llama/Llama-2-70b-hf"
	elif model_name == "Mistral-7B-v0.1":
	_model_name = "mistralai/Mistral-7B-v0.1"
	elif model_name == "Mixtral-8x22B":
	_model_name = "mistralai/Mixtral-8x22B"
	elif model_name == "Qwen1.5-72B":
	_model_name = "Qwen/Qwen1.5-72B"
	elif model_name == "Yi-34B":
	_model_name = "zero-one-ai/Yi-34B"
	elif model_name == "Yi-6B":
	_model_name = "zero-one-ai/Yi-6B"
	elif model_name == "OLMO":
	_model_name = "allenai/OLMo-7B"
	else:
	raise ValueError("Invalid model name")
	# _model_name = "meta-llama/Llama-3-8b-hf"

	if together_api_key and len(together_api_key) == 64:
	api_key = together_api_key
	else:
	api_key = DEFAULT_API_KEY

	request = openai_base_request(prompt=prompt, model=_model_name,
	temperature=temperature,
	max_tokens=max_tokens,
	top_p=top_p,
	repetition_penalty=rp,
	stop=stop_str, api_key=api_key)

	response = ""
	for msg in request:
	# print(msg.choices[0].delta.keys())
	token = msg.choices[0].delta["content"]
	should_stop = False
	for _stop in stop_str:
	if _stop in response + token:
	should_stop = True
	break
	if should_stop:
	break
	response += token
	if response.endswith('\n"'):
	response = response[:-1]
	elif response.endswith('\n""'):
	response = response[:-2]
	yield response

	js_code_label = """
	function addApiKeyLink() {
	// Select the div with id 'api_key'
	const apiKeyDiv = document.getElementById('api_key');

	// Find the span within that div with data-testid 'block-info'
	const blockInfoSpan = apiKeyDiv.querySelector('span[data-testid="block-info"]');

	// Create the new link element
	const newLink = document.createElement('a');
	newLink.href = 'https://api.together.ai/settings/api-keys';
	newLink.textContent = ' View your keys here.';
	newLink.target = '_blank'; // Open link in new tab
	newLink.style = 'color: #007bff; text-decoration: underline;';

	// Create the additional text
	const additionalText = document.createTextNode(' (new account will have free credits to use.)');

	// Append the link and additional text to the span
	if (blockInfoSpan) {
	// add a br
	apiKeyDiv.appendChild(document.createElement('br'));
	apiKeyDiv.appendChild(newLink);
	apiKeyDiv.appendChild(additionalText);
	} else {
	console.error('Span with data-testid "block-info" not found');
	}
	}
	"""
	with gr.Blocks(gr.themes.Soft(), js=js_code_label) as demo:
	with gr.Row():
	with gr.Column():
	gr.Markdown("""# 💬 BaseChat: Chat with Base LLMs with URIAL
	[Paper](https://arxiv.org/abs/2312.01552) \| [Website](https://allenai.github.io/re-align/) \| [GitHub](https://github.com/Re-Align/urial) \| Contact: [Yuchen Lin](https://yuchenlin.xyz/)

	Talk with __BASE__ LLMs which are not fine-tuned at all.
	""")
	model_name = gr.Radio(["Llama-3-8B", "Llama-3-70B", "Mistral-7B-v0.1",
	"Mixtral-8x22B", "Yi-6B", "Yi-34B", "Llama-2-7B", "Llama-2-70B", "OLMO"]
	, value="Llama-3-8B", label="Base LLM name")
	with gr.Column():
	together_api_key = gr.Textbox(label="🔑 Together APIKey", placeholder="Enter your Together API Key. Leave it blank if you want to use the default API key.", type="password", elem_id="api_key")
	with gr.Column():
	with gr.Row():
	max_tokens = gr.Textbox(value=256, label="Max tokens")
	temperature = gr.Textbox(value=0.5, label="Temperature")
	# with gr.Column():
	# with gr.Row():
	top_p = gr.Textbox(value=0.9, label="Top-p")
	rp = gr.Textbox(value=1.1, label="Repetition penalty")


	chat = gr.ChatInterface(
	respond,
	additional_inputs=[max_tokens, temperature, top_p, rp, model_name, together_api_key],
	# additional_inputs_accordion="⚙️ Parameters",
	# fill_height=True,
	)
	chat.chatbot.height = 550




	if __name__ == "__main__":
	demo.launch()