Spaces:
Sleeping
Sleeping
import base64 | |
import numpy as np | |
from typing import Dict | |
import random | |
import asyncio | |
import logging | |
import os, json | |
from typing import Any | |
from aiohttp import ClientSession | |
from tqdm.asyncio import tqdm_asyncio | |
import random | |
from time import sleep | |
import aiolimiter | |
import openai | |
from openai import AsyncOpenAI, OpenAIError | |
from anthropic import AsyncAnthropic | |
async def _throttled_openai_chat_completion_acreate( | |
client: AsyncOpenAI, | |
model: str, | |
messages, | |
temperature: float, | |
max_tokens: int, | |
top_p: float, | |
limiter: aiolimiter.AsyncLimiter, | |
json_format: bool = False, | |
n: int = 1, | |
): | |
async with limiter: | |
for _ in range(10): | |
try: | |
if json_format: | |
return await client.chat.completions.create( | |
model=model, | |
messages=messages, | |
temperature=temperature, | |
max_tokens=max_tokens, | |
top_p=top_p, | |
n=n, | |
response_format={"type": "json_object"}, | |
) | |
else: | |
return await client.chat.completions.create( | |
model=model, | |
messages=messages, | |
temperature=temperature, | |
max_tokens=max_tokens, | |
top_p=top_p, | |
n=n, | |
) | |
except openai.RateLimitError as e: | |
print("Rate limit exceeded, retrying...") | |
sleep(random.randint(10, 20)) # ๅขๅ ้่ฏ็ญๅพ ๆถ้ด | |
except openai.BadRequestError as e: | |
print(e) | |
return None | |
except OpenAIError as e: | |
print(e) | |
sleep(random.randint(5, 10)) | |
return None | |
async def generate_from_openai_chat_completion( | |
client, | |
messages, | |
engine_name: str, | |
temperature: float = 1.0, | |
max_tokens: int = 512, | |
top_p: float = 1.0, | |
requests_per_minute: int = 100, | |
json_format: bool = False, | |
n: int = 1, | |
): | |
# https://chat.openai.com/share/09154613-5f66-4c74-828b-7bd9384c2168 | |
delay = 60.0 / requests_per_minute | |
limiter = aiolimiter.AsyncLimiter(1, delay) | |
async_responses = [ | |
_throttled_openai_chat_completion_acreate( | |
client, | |
model=engine_name, | |
messages=message, | |
temperature=temperature, | |
max_tokens=max_tokens, | |
top_p=top_p, | |
limiter=limiter, | |
json_format=json_format, | |
n=n, | |
) | |
for message in messages | |
] | |
responses = await tqdm_asyncio.gather(*async_responses) | |
empty_dict = { | |
"question": "", | |
"options": { | |
"A": "", | |
"B": "", | |
"C": "", | |
"D": "", | |
}, | |
"distractors": { | |
"E": "", | |
"F": "", | |
"G": "", | |
}, | |
"correct_answer": "" | |
} | |
empty_str = "" | |
outputs = [] | |
for response in responses: | |
if n == 1: | |
if json_format: | |
if response and response.choices[0] and response.choices[0].message and response.choices[0].message.content: | |
outputs.append(json.loads(response.choices[0].message.content)) | |
else: | |
outputs.append(empty_dict) | |
else: | |
if response and response.choices[0] and response.choices[0].message and response.choices[0].message.content: | |
outputs.append(response.choices[0].message.content) | |
else: | |
outputs.append(empty_str) | |
else: | |
if json_format: | |
outputs.append([ | |
json.loads(response.choices[i].message.content) if response and response.choices[i].message.content else empty_dict | |
for i in range(n) | |
]) | |
else: | |
outputs.append([ | |
response.choices[i].message.content if response and response.choices[i].message.content else empty_str | |
for i in range(n) | |
]) | |
return outputs | |
async def _throttled_claude_chat_completion_acreate( | |
client: AsyncAnthropic, | |
model: str, | |
messages, | |
temperature: float, | |
max_tokens: int, | |
top_p: float, | |
limiter: aiolimiter.AsyncLimiter, | |
): | |
async with limiter: | |
try: | |
return await client.messages.create( | |
model=model, | |
messages=messages, | |
temperature=temperature, | |
max_tokens=max_tokens, | |
top_p=top_p, | |
) | |
except: | |
return None | |
async def generate_from_claude_chat_completion( | |
client, | |
messages, | |
engine_name: str, | |
temperature: float = 1.0, | |
max_tokens: int = 512, | |
top_p: float = 1.0, | |
requests_per_minute: int = 100, | |
n: int = 1, | |
): | |
# https://chat.openai.com/share/09154613-5f66-4c74-828b-7bd9384c2168 | |
delay = 60.0 / requests_per_minute | |
limiter = aiolimiter.AsyncLimiter(1, delay) | |
n_messages = [] | |
for message in messages: | |
for _ in range(n): | |
n_messages.append(message) | |
async_responses = [ | |
_throttled_claude_chat_completion_acreate( | |
client, | |
model=engine_name, | |
messages=message, | |
temperature=temperature, | |
max_tokens=max_tokens, | |
top_p=top_p, | |
limiter=limiter, | |
) | |
for message in n_messages | |
] | |
responses = await tqdm_asyncio.gather(*async_responses) | |
outputs = [] | |
if n == 1: | |
for response in responses: | |
if response and response.content and response.content[0] and response.content[0].text: | |
outputs.append(response.content[0].text) | |
else: | |
outputs.append("") | |
else: | |
idx = 0 | |
for response in responses: | |
if idx % n == 0: | |
outputs.append([]) | |
idx += 1 | |
if response and response.content and response.content[0] and response.content[0].text: | |
outputs[-1].append(response.content[0].text) | |
else: | |
outputs[-1].append("") | |
return outputs |