Spaces:
Sleeping
Sleeping
from openai import AuthenticationError, BadRequestError, RateLimitError, OpenAIError | |
import os | |
import sys | |
import traceback | |
import subprocess | |
sys.path.insert( | |
0, os.path.abspath("../..") | |
) # Adds the parent directory to the system path | |
import litellm | |
from litellm import ( | |
embedding, | |
completion, | |
# AuthenticationError, | |
ContextWindowExceededError, | |
# RateLimitError, | |
# ServiceUnavailableError, | |
# OpenAIError, | |
) | |
from concurrent.futures import ThreadPoolExecutor | |
import pytest | |
litellm.vertex_project = "pathrise-convert-1606954137718" | |
litellm.vertex_location = "us-central1" | |
litellm.num_retries = 0 | |
# litellm.failure_callback = ["sentry"] | |
#### What this tests #### | |
# This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type | |
# 5 providers -> OpenAI, Azure, Anthropic, Cohere, Replicate | |
# 3 main types of exceptions -> - Rate Limit Errors, Context Window Errors, Auth errors (incorrect/rotated key, etc.) | |
# Approach: Run each model through the test -> assert if the correct error (always the same one) is triggered | |
models = ["command-nightly"] | |
# Test 1: Context Window Errors | |
def test_context_window(model): | |
print("Testing context window error") | |
sample_text = "Say error 50 times" * 1000000 | |
messages = [{"content": sample_text, "role": "user"}] | |
try: | |
litellm.set_verbose = True | |
response = completion(model=model, messages=messages) | |
print(f"response: {response}") | |
print("FAILED!") | |
pytest.fail(f"An exception occurred") | |
except ContextWindowExceededError as e: | |
print(f"Worked!") | |
except RateLimitError: | |
print("RateLimited!") | |
except Exception as e: | |
print(f"{e}") | |
pytest.fail(f"An error occcurred - {e}") | |
def test_context_window_with_fallbacks(model): | |
ctx_window_fallback_dict = { | |
"command-nightly": "claude-2", | |
"gpt-3.5-turbo-instruct": "gpt-3.5-turbo-16k", | |
"azure/chatgpt-v-2": "gpt-3.5-turbo-16k", | |
} | |
sample_text = "how does a court case get to the Supreme Court?" * 1000 | |
messages = [{"content": sample_text, "role": "user"}] | |
completion( | |
model=model, | |
messages=messages, | |
context_window_fallback_dict=ctx_window_fallback_dict, | |
) | |
# for model in litellm.models_by_provider["bedrock"]: | |
# test_context_window(model=model) | |
# test_context_window(model="chat-bison") | |
# test_context_window_with_fallbacks(model="command-nightly") | |
# Test 2: InvalidAuth Errors | |
def invalid_auth(model): # set the model key to an invalid key, depending on the model | |
messages = [{"content": "Hello, how are you?", "role": "user"}] | |
temporary_key = None | |
try: | |
if model == "gpt-3.5-turbo" or model == "gpt-3.5-turbo-instruct": | |
temporary_key = os.environ["OPENAI_API_KEY"] | |
os.environ["OPENAI_API_KEY"] = "bad-key" | |
elif "bedrock" in model: | |
temporary_aws_access_key = os.environ["AWS_ACCESS_KEY_ID"] | |
os.environ["AWS_ACCESS_KEY_ID"] = "bad-key" | |
temporary_aws_region_name = os.environ["AWS_REGION_NAME"] | |
os.environ["AWS_REGION_NAME"] = "bad-key" | |
temporary_secret_key = os.environ["AWS_SECRET_ACCESS_KEY"] | |
os.environ["AWS_SECRET_ACCESS_KEY"] = "bad-key" | |
elif model == "azure/chatgpt-v-2": | |
temporary_key = os.environ["AZURE_API_KEY"] | |
os.environ["AZURE_API_KEY"] = "bad-key" | |
elif model == "claude-instant-1": | |
temporary_key = os.environ["ANTHROPIC_API_KEY"] | |
os.environ["ANTHROPIC_API_KEY"] = "bad-key" | |
elif model == "command-nightly": | |
temporary_key = os.environ["COHERE_API_KEY"] | |
os.environ["COHERE_API_KEY"] = "bad-key" | |
elif "j2" in model: | |
temporary_key = os.environ["AI21_API_KEY"] | |
os.environ["AI21_API_KEY"] = "bad-key" | |
elif "togethercomputer" in model: | |
temporary_key = os.environ["TOGETHERAI_API_KEY"] | |
os.environ[ | |
"TOGETHERAI_API_KEY" | |
] = "84060c79880fc49df126d3e87b53f8a463ff6e1c6d27fe64207cde25cdfcd1f24a" | |
elif model in litellm.openrouter_models: | |
temporary_key = os.environ["OPENROUTER_API_KEY"] | |
os.environ["OPENROUTER_API_KEY"] = "bad-key" | |
elif model in litellm.aleph_alpha_models: | |
temporary_key = os.environ["ALEPH_ALPHA_API_KEY"] | |
os.environ["ALEPH_ALPHA_API_KEY"] = "bad-key" | |
elif model in litellm.nlp_cloud_models: | |
temporary_key = os.environ["NLP_CLOUD_API_KEY"] | |
os.environ["NLP_CLOUD_API_KEY"] = "bad-key" | |
elif ( | |
model | |
== "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1" | |
): | |
temporary_key = os.environ["REPLICATE_API_KEY"] | |
os.environ["REPLICATE_API_KEY"] = "bad-key" | |
print(f"model: {model}") | |
response = completion(model=model, messages=messages) | |
print(f"response: {response}") | |
except AuthenticationError as e: | |
print(f"AuthenticationError Caught Exception - {str(e)}") | |
except ( | |
OpenAIError | |
) as e: # is at least an openai error -> in case of random model errors - e.g. overloaded server | |
print(f"OpenAIError Caught Exception - {e}") | |
except Exception as e: | |
print(type(e)) | |
print(type(AuthenticationError)) | |
print(e.__class__.__name__) | |
print(f"Uncaught Exception - {e}") | |
pytest.fail(f"Error occurred: {e}") | |
if temporary_key != None: # reset the key | |
if model == "gpt-3.5-turbo": | |
os.environ["OPENAI_API_KEY"] = temporary_key | |
elif model == "chatgpt-test": | |
os.environ["AZURE_API_KEY"] = temporary_key | |
azure = True | |
elif model == "claude-instant-1": | |
os.environ["ANTHROPIC_API_KEY"] = temporary_key | |
elif model == "command-nightly": | |
os.environ["COHERE_API_KEY"] = temporary_key | |
elif ( | |
model | |
== "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1" | |
): | |
os.environ["REPLICATE_API_KEY"] = temporary_key | |
elif "j2" in model: | |
os.environ["AI21_API_KEY"] = temporary_key | |
elif "togethercomputer" in model: | |
os.environ["TOGETHERAI_API_KEY"] = temporary_key | |
elif model in litellm.aleph_alpha_models: | |
os.environ["ALEPH_ALPHA_API_KEY"] = temporary_key | |
elif model in litellm.nlp_cloud_models: | |
os.environ["NLP_CLOUD_API_KEY"] = temporary_key | |
elif "bedrock" in model: | |
os.environ["AWS_ACCESS_KEY_ID"] = temporary_aws_access_key | |
os.environ["AWS_REGION_NAME"] = temporary_aws_region_name | |
os.environ["AWS_SECRET_ACCESS_KEY"] = temporary_secret_key | |
return | |
# for model in litellm.models_by_provider["bedrock"]: | |
# invalid_auth(model=model) | |
# invalid_auth(model="command-nightly") | |
# Test 3: Invalid Request Error | |
def test_invalid_request_error(model): | |
messages = [{"content": "hey, how's it going?", "role": "user"}] | |
with pytest.raises(BadRequestError): | |
completion(model=model, messages=messages, max_tokens="hello world") | |
def test_completion_azure_exception(): | |
try: | |
import openai | |
print("azure gpt-3.5 test\n\n") | |
litellm.set_verbose = True | |
## Test azure call | |
old_azure_key = os.environ["AZURE_API_KEY"] | |
os.environ["AZURE_API_KEY"] = "good morning" | |
response = completion( | |
model="azure/chatgpt-v-2", | |
messages=[{"role": "user", "content": "hello"}], | |
) | |
os.environ["AZURE_API_KEY"] = old_azure_key | |
print(f"response: {response}") | |
print(response) | |
except openai.AuthenticationError as e: | |
os.environ["AZURE_API_KEY"] = old_azure_key | |
print("good job got the correct error for azure when key not set") | |
except Exception as e: | |
pytest.fail(f"Error occurred: {e}") | |
# test_completion_azure_exception() | |
async def asynctest_completion_azure_exception(): | |
try: | |
import openai | |
import litellm | |
print("azure gpt-3.5 test\n\n") | |
litellm.set_verbose = True | |
## Test azure call | |
old_azure_key = os.environ["AZURE_API_KEY"] | |
os.environ["AZURE_API_KEY"] = "good morning" | |
response = await litellm.acompletion( | |
model="azure/chatgpt-v-2", | |
messages=[{"role": "user", "content": "hello"}], | |
) | |
print(f"response: {response}") | |
print(response) | |
except openai.AuthenticationError as e: | |
os.environ["AZURE_API_KEY"] = old_azure_key | |
print("good job got the correct error for azure when key not set") | |
print(e) | |
except Exception as e: | |
print("Got wrong exception") | |
print("exception", e) | |
pytest.fail(f"Error occurred: {e}") | |
# import asyncio | |
# asyncio.run( | |
# asynctest_completion_azure_exception() | |
# ) | |
def asynctest_completion_openai_exception_bad_model(): | |
try: | |
import openai | |
import litellm, asyncio | |
print("azure exception bad model\n\n") | |
litellm.set_verbose = True | |
## Test azure call | |
async def test(): | |
response = await litellm.acompletion( | |
model="openai/gpt-6", | |
messages=[{"role": "user", "content": "hello"}], | |
) | |
asyncio.run(test()) | |
except openai.NotFoundError: | |
print("Good job this is a NotFoundError for a model that does not exist!") | |
print("Passed") | |
except Exception as e: | |
print("Raised wrong type of exception", type(e)) | |
assert isinstance(e, openai.BadRequestError) | |
pytest.fail(f"Error occurred: {e}") | |
# asynctest_completion_openai_exception_bad_model() | |
def asynctest_completion_azure_exception_bad_model(): | |
try: | |
import openai | |
import litellm, asyncio | |
print("azure exception bad model\n\n") | |
litellm.set_verbose = True | |
## Test azure call | |
async def test(): | |
response = await litellm.acompletion( | |
model="azure/gpt-12", | |
messages=[{"role": "user", "content": "hello"}], | |
) | |
asyncio.run(test()) | |
except openai.NotFoundError: | |
print("Good job this is a NotFoundError for a model that does not exist!") | |
print("Passed") | |
except Exception as e: | |
print("Raised wrong type of exception", type(e)) | |
pytest.fail(f"Error occurred: {e}") | |
# asynctest_completion_azure_exception_bad_model() | |
def test_completion_openai_exception(): | |
# test if openai:gpt raises openai.AuthenticationError | |
try: | |
import openai | |
print("openai gpt-3.5 test\n\n") | |
litellm.set_verbose = True | |
## Test azure call | |
old_azure_key = os.environ["OPENAI_API_KEY"] | |
os.environ["OPENAI_API_KEY"] = "good morning" | |
response = completion( | |
model="gpt-4", | |
messages=[{"role": "user", "content": "hello"}], | |
) | |
print(f"response: {response}") | |
print(response) | |
except openai.AuthenticationError as e: | |
os.environ["OPENAI_API_KEY"] = old_azure_key | |
print("OpenAI: good job got the correct error for openai when key not set") | |
except Exception as e: | |
pytest.fail(f"Error occurred: {e}") | |
# test_completion_openai_exception() | |
def test_completion_mistral_exception(): | |
# test if mistral/mistral-tiny raises openai.AuthenticationError | |
try: | |
import openai | |
print("Testing mistral ai exception mapping") | |
litellm.set_verbose = True | |
## Test azure call | |
old_azure_key = os.environ["MISTRAL_API_KEY"] | |
os.environ["MISTRAL_API_KEY"] = "good morning" | |
response = completion( | |
model="mistral/mistral-tiny", | |
messages=[{"role": "user", "content": "hello"}], | |
) | |
print(f"response: {response}") | |
print(response) | |
except openai.AuthenticationError as e: | |
os.environ["MISTRAL_API_KEY"] = old_azure_key | |
print("good job got the correct error for openai when key not set") | |
except Exception as e: | |
pytest.fail(f"Error occurred: {e}") | |
# test_completion_mistral_exception() | |
def test_content_policy_exceptionimage_generation_openai(): | |
try: | |
# this is ony a test - we needed some way to invoke the exception :( | |
litellm.set_verbose = True | |
response = litellm.image_generation( | |
prompt="where do i buy lethal drugs from", model="dall-e-3" | |
) | |
print(f"response: {response}") | |
assert len(response.data) > 0 | |
except litellm.ContentPolicyViolationError as e: | |
print("caught a content policy violation error! Passed") | |
pass | |
except Exception as e: | |
pytest.fail(f"An exception occurred - {str(e)}") | |
# test_content_policy_exceptionimage_generation_openai() | |
# # test_invalid_request_error(model="command-nightly") | |
# # Test 3: Rate Limit Errors | |
# def test_model_call(model): | |
# try: | |
# sample_text = "how does a court case get to the Supreme Court?" | |
# messages = [{ "content": sample_text,"role": "user"}] | |
# print(f"model: {model}") | |
# response = completion(model=model, messages=messages) | |
# except RateLimitError as e: | |
# print(f"headers: {e.response.headers}") | |
# return True | |
# # except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server | |
# # return True | |
# except Exception as e: | |
# print(f"Uncaught Exception {model}: {type(e).__name__} - {e}") | |
# traceback.print_exc() | |
# pass | |
# return False | |
# # Repeat each model 500 times | |
# # extended_models = [model for model in models for _ in range(250)] | |
# extended_models = ["azure/chatgpt-v-2" for _ in range(250)] | |
# def worker(model): | |
# return test_model_call(model) | |
# # Create a dictionary to store the results | |
# counts = {True: 0, False: 0} | |
# # Use Thread Pool Executor | |
# with ThreadPoolExecutor(max_workers=500) as executor: | |
# # Use map to start the operation in thread pool | |
# results = executor.map(worker, extended_models) | |
# # Iterate over results and count True/False | |
# for result in results: | |
# counts[result] += 1 | |
# accuracy_score = counts[True]/(counts[True] + counts[False]) | |
# print(f"accuracy_score: {accuracy_score}") | |