import base64 import numpy as np from typing import Dict import random import asyncio import logging import os, json from typing import Any from aiohttp import ClientSession from tqdm.asyncio import tqdm_asyncio import random from time import sleep import aiolimiter import openai from openai import AsyncOpenAI, OpenAIError from anthropic import AsyncAnthropic async def _throttled_openai_chat_completion_acreate( client: AsyncOpenAI, model: str, messages, temperature: float, max_tokens: int, top_p: float, limiter: aiolimiter.AsyncLimiter, json_format: bool = False, n: int = 1, ): async with limiter: for _ in range(10): try: if json_format: return await client.chat.completions.create( model=model, messages=messages, temperature=temperature, max_tokens=max_tokens, top_p=top_p, n=n, response_format={"type": "json_object"}, ) else: return await client.chat.completions.create( model=model, messages=messages, temperature=temperature, max_tokens=max_tokens, top_p=top_p, n=n, ) except openai.RateLimitError as e: print("Rate limit exceeded, retrying...") sleep(random.randint(10, 20)) # 增加重试等待时间 except openai.BadRequestError as e: print(e) return None except OpenAIError as e: print(e) sleep(random.randint(5, 10)) return None async def generate_from_openai_chat_completion( client, messages, engine_name: str, temperature: float = 1.0, max_tokens: int = 512, top_p: float = 1.0, requests_per_minute: int = 100, json_format: bool = False, n: int = 1, ): # https://chat.openai.com/share/09154613-5f66-4c74-828b-7bd9384c2168 delay = 60.0 / requests_per_minute limiter = aiolimiter.AsyncLimiter(1, delay) async_responses = [ _throttled_openai_chat_completion_acreate( client, model=engine_name, messages=message, temperature=temperature, max_tokens=max_tokens, top_p=top_p, limiter=limiter, json_format=json_format, n=n, ) for message in messages ] responses = await tqdm_asyncio.gather(*async_responses) empty_dict = { "question": "", "options": { "A": "", "B": "", "C": "", "D": "", }, "distractors": { "E": "", "F": "", "G": "", }, "correct_answer": "" } empty_str = "" outputs = [] for response in responses: if n == 1: if json_format: if response and response.choices[0] and response.choices[0].message and response.choices[0].message.content: outputs.append(json.loads(response.choices[0].message.content)) else: outputs.append(empty_dict) else: if response and response.choices[0] and response.choices[0].message and response.choices[0].message.content: outputs.append(response.choices[0].message.content) else: outputs.append(empty_str) else: if json_format: outputs.append([ json.loads(response.choices[i].message.content) if response and response.choices[i].message.content else empty_dict for i in range(n) ]) else: outputs.append([ response.choices[i].message.content if response and response.choices[i].message.content else empty_str for i in range(n) ]) return outputs async def _throttled_claude_chat_completion_acreate( client: AsyncAnthropic, model: str, messages, temperature: float, max_tokens: int, top_p: float, limiter: aiolimiter.AsyncLimiter, ): async with limiter: try: return await client.messages.create( model=model, messages=messages, temperature=temperature, max_tokens=max_tokens, top_p=top_p, ) except: return None async def generate_from_claude_chat_completion( client, messages, engine_name: str, temperature: float = 1.0, max_tokens: int = 512, top_p: float = 1.0, requests_per_minute: int = 100, n: int = 1, ): # https://chat.openai.com/share/09154613-5f66-4c74-828b-7bd9384c2168 delay = 60.0 / requests_per_minute limiter = aiolimiter.AsyncLimiter(1, delay) n_messages = [] for message in messages: for _ in range(n): n_messages.append(message) async_responses = [ _throttled_claude_chat_completion_acreate( client, model=engine_name, messages=message, temperature=temperature, max_tokens=max_tokens, top_p=top_p, limiter=limiter, ) for message in n_messages ] responses = await tqdm_asyncio.gather(*async_responses) outputs = [] if n == 1: for response in responses: if response and response.content and response.content[0] and response.content[0].text: outputs.append(response.content[0].text) else: outputs.append("") else: idx = 0 for response in responses: if idx % n == 0: outputs.append([]) idx += 1 if response and response.content and response.content[0] and response.content[0].text: outputs[-1].append(response.content[0].text) else: outputs[-1].append("") return outputs