import gradio as gr from openai import OpenAI import os from typing import List import logging import urllib.request # add logging info to console logging.basicConfig(level=logging.INFO) BASE_URL = "https://api.together.xyz/v1" DEFAULT_API_KEY = os.getenv("TOGETHER_API_KEY") URIAL_VERSION = "inst_1k_v4.help" URIAL_URL = f"https://raw.githubusercontent.com/Re-Align/URIAL/main/urial_prompts/{URIAL_VERSION}.txt" urial_prompt = urllib.request.urlopen(URIAL_URL).read().decode('utf-8') urial_prompt = urial_prompt.replace("```", '"""') # new version of URIAL uses """ instead of ``` STOP_STRS = ['"""', '# Query:', '# Answer:'] def urial_template(urial_prompt, history, message): current_prompt = urial_prompt + "\n" for user_msg, ai_msg in history: current_prompt += f'# Query:\n"""\n{user_msg}\n"""\n\n# Answer:\n"""\n{ai_msg}\n"""\n\n' current_prompt += f'# Query:\n"""\n{message}\n"""\n\n# Answer:\n"""\n' return current_prompt def openai_base_request( model: str=None, temperature: float=0, max_tokens: int=512, top_p: float=1.0, prompt: str=None, n: int=1, repetition_penalty: float=1.0, stop: List[str]=None, api_key: str=None, ): if api_key is None: api_key = DEFAULT_API_KEY client = OpenAI(api_key=api_key, base_url=BASE_URL) # print(f"Requesting chat completion from OpenAI API with model {model}") logging.info(f"Requesting chat completion from OpenAI API with model {model}") logging.info(f"Prompt: {prompt}") logging.info(f"Temperature: {temperature}") logging.info(f"Max tokens: {max_tokens}") logging.info(f"Top-p: {top_p}") logging.info(f"Repetition penalty: {repetition_penalty}") logging.info(f"Stop: {stop}") request = client.completions.create( model=model, prompt=prompt, temperature=float(temperature), max_tokens=int(max_tokens), top_p=float(top_p), n=n, extra_body={'repetition_penalty': float(repetition_penalty)}, stop=stop, stream=True ) return request def respond( message, history: list[tuple[str, str]], max_tokens, temperature, top_p, rp, model_name, together_api_key ): global STOP_STRS, urial_prompt rp = 1.0 prompt = urial_template(urial_prompt, history, message) if model_name == "Llama-3-8B": _model_name = "meta-llama/Llama-3-8b-hf" elif model_name == "Llama-3-70B": _model_name = "meta-llama/Llama-3-70b-hf" elif model_name == "Llama-2-7B": _model_name = "meta-llama/Llama-2-7b-hf" elif model_name == "Llama-2-70B": _model_name = "meta-llama/Llama-2-70b-hf" elif model_name == "Mistral-7B-v0.1": _model_name = "mistralai/Mistral-7B-v0.1" elif model_name == "Mixtral-8x22B": _model_name = "mistralai/Mixtral-8x22B" elif model_name == "Qwen1.5-72B": _model_name = "Qwen/Qwen1.5-72B" elif model_name == "Yi-34B": _model_name = "zero-one-ai/Yi-34B" elif model_name == "Yi-6B": _model_name = "zero-one-ai/Yi-6B" elif model_name == "OLMO": _model_name = "allenai/OLMo-7B" else: raise ValueError("Invalid model name") # _model_name = "meta-llama/Llama-3-8b-hf" if together_api_key and len(together_api_key) == 64: api_key = together_api_key else: api_key = DEFAULT_API_KEY request = openai_base_request(prompt=prompt, model=_model_name, temperature=temperature, max_tokens=max_tokens, top_p=top_p, repetition_penalty=rp, stop=STOP_STRS, api_key=api_key) response = "" for msg in request: # print(msg.choices[0].delta.keys()) token = msg.choices[0].delta["content"] should_stop = False for _stop in STOP_STRS: if _stop in response + token: should_stop = True break if should_stop: break response += token if response.endswith('\n"'): response = response[:-1] elif response.endswith('\n""'): response = response[:-2] yield response js_code_label = """ function addApiKeyLink() { // Select the div with id 'api_key' const apiKeyDiv = document.getElementById('api_key'); // Find the span within that div with data-testid 'block-info' const blockInfoSpan = apiKeyDiv.querySelector('span[data-testid="block-info"]'); // Create the new link element const newLink = document.createElement('a'); newLink.href = 'https://api.together.ai/settings/api-keys'; newLink.textContent = ' View your keys here.'; newLink.target = '_blank'; // Open link in new tab newLink.style = 'color: #007bff; text-decoration: underline;'; // Create the additional text const additionalText = document.createTextNode(' (new account will have free credits to use.)'); // Append the link and additional text to the span if (blockInfoSpan) { // add a br apiKeyDiv.appendChild(document.createElement('br')); apiKeyDiv.appendChild(newLink); apiKeyDiv.appendChild(additionalText); } else { console.error('Span with data-testid "block-info" not found'); } } """ with gr.Blocks(gr.themes.Soft(), js=js_code_label) as demo: with gr.Row(): with gr.Column(): gr.Markdown("""# 💬 BaseChat: Chat with Base LLMs with URIAL [Paper](https://arxiv.org/abs/2312.01552) | [Website](https://allenai.github.io/re-align/) | [GitHub](https://github.com/Re-Align/urial) | Contact: [Yuchen Lin](https://yuchenlin.xyz/) **Talk with __BASE__ LLMs which are not fine-tuned at all.** """) model_name = gr.Radio(["Llama-3-8B", "Llama-3-70B", "Mistral-7B-v0.1", "Mixtral-8x22B", "Yi-6B", "Yi-34B", "Llama-2-7B", "Llama-2-70B", "OLMO"] , value="Llama-3-8B", label="Base LLM name") with gr.Column(): together_api_key = gr.Textbox(label="🔑 Together APIKey", placeholder="Enter your Together API Key. Leave it blank if you want to use the default API key.", type="password", elem_id="api_key") with gr.Column(): with gr.Row(): max_tokens = gr.Textbox(value=256, label="Max tokens") temperature = gr.Textbox(value=0.5, label="Temperature") # with gr.Column(): # with gr.Row(): top_p = gr.Textbox(value=0.9, label="Top-p") rp = gr.Textbox(value=1.1, label="Repetition penalty") chat = gr.ChatInterface( respond, additional_inputs=[max_tokens, temperature, top_p, rp, model_name, together_api_key], # additional_inputs_accordion="⚙️ Parameters", # fill_height=True, ) chat.chatbot.height = 550 if __name__ == "__main__": demo.launch()