from openai import OpenAI import logging from typing import List import os BASE_URL = "https://api.together.xyz/v1" DEFAULT_API_KEY = os.getenv("TOGETHER_API_KEY") def model_name_mapping(model_name): if model_name == "Llama-3-8B": _model_name = "meta-llama/Llama-3-8b-hf" elif model_name == "Llama-3-70B": _model_name = "meta-llama/Llama-3-70b-hf" elif model_name == "Llama-2-7B": _model_name = "meta-llama/Llama-2-7b-hf" elif model_name == "Llama-2-70B": _model_name = "meta-llama/Llama-2-70b-hf" elif model_name == "Mistral-7B-v0.1": _model_name = "mistralai/Mistral-7B-v0.1" elif model_name == "Mixtral-8x22B": _model_name = "mistralai/Mixtral-8x22B" elif model_name == "Qwen1.5-72B": _model_name = "Qwen/Qwen1.5-72B" elif model_name == "Yi-34B": _model_name = "zero-one-ai/Yi-34B" elif model_name == "Yi-6B": _model_name = "zero-one-ai/Yi-6B" elif model_name == "OLMO": _model_name = "allenai/OLMo-7B" else: raise ValueError("Invalid model name") return _model_name def urial_template(urial_prompt, history, message): current_prompt = urial_prompt + "\n" for user_msg, ai_msg in history: current_prompt += f'# Query:\n"""\n{user_msg}\n"""\n\n# Answer:\n"""\n{ai_msg}\n"""\n\n' current_prompt += f'# Query:\n"""\n{message}\n"""\n\n# Answer:\n"""\n' return current_prompt def openai_base_request( model: str=None, temperature: float=0, max_tokens: int=512, top_p: float=1.0, prompt: str=None, n: int=1, repetition_penalty: float=1.0, stop: List[str]=None, api_key: str=None, ): if api_key is None: api_key = DEFAULT_API_KEY client = OpenAI(api_key=api_key, base_url=BASE_URL) # print(f"Requesting chat completion from OpenAI API with model {model}") logging.info(f"Requesting chat completion from OpenAI API with model {model}") logging.info(f"Prompt: {prompt}") logging.info(f"Temperature: {temperature}") logging.info(f"Max tokens: {max_tokens}") logging.info(f"Top-p: {top_p}") logging.info(f"Repetition penalty: {repetition_penalty}") logging.info(f"Stop: {stop}") request = client.completions.create( model=model, prompt=prompt, temperature=float(temperature), max_tokens=int(max_tokens), top_p=float(top_p), n=n, extra_body={'repetition_penalty': float(repetition_penalty)}, stop=stop, stream=True ) return request