richardblythman
/

as_chat

Model card Files Files and versions Community

richardblythman commited on Aug 18, 2024

Commit

ada32cf

verified ·

1 Parent(s): 463094c

Upload folder using huggingface_hub

Browse files

Files changed (39) hide show

.venv/lib/python3.12/site-packages/__pycache__/_virtualenv.cpython-312.pyc +0 -0
.venv/lib/python3.12/site-packages/litellm-1.43.18.dist-info/INSTALLER +1 -0
.venv/lib/python3.12/site-packages/litellm-1.43.18.dist-info/LICENSE +26 -0
.venv/lib/python3.12/site-packages/litellm-1.43.18.dist-info/METADATA +395 -0
.venv/lib/python3.12/site-packages/litellm-1.43.18.dist-info/RECORD +577 -0
.venv/lib/python3.12/site-packages/litellm-1.43.18.dist-info/WHEEL +4 -0
.venv/lib/python3.12/site-packages/litellm-1.43.18.dist-info/entry_points.txt +3 -0
.venv/lib/python3.12/site-packages/litellm/cost_calculator.py +8 -3
.venv/lib/python3.12/site-packages/litellm/integrations/prometheus.py +47 -5
.venv/lib/python3.12/site-packages/litellm/litellm_core_utils/litellm_logging.py +3 -2
.venv/lib/python3.12/site-packages/litellm/llms/anthropic.py +5 -3
.venv/lib/python3.12/site-packages/litellm/llms/prompt_templates/factory.py +15 -3
.venv/lib/python3.12/site-packages/litellm/llms/vertex_httpx.py +19 -2
.venv/lib/python3.12/site-packages/litellm/model_prices_and_context_window_backup.json +18 -0
.venv/lib/python3.12/site-packages/litellm/proxy/_new_secret_config.yaml +2 -4
.venv/lib/python3.12/site-packages/litellm/proxy/_types.py +4 -0
.venv/lib/python3.12/site-packages/litellm/proxy/auth/auth_checks.py +43 -6
.venv/lib/python3.12/site-packages/litellm/proxy/auth/auth_utils.py +17 -0
.venv/lib/python3.12/site-packages/litellm/proxy/common_utils/callback_utils.py +297 -0
.venv/lib/python3.12/site-packages/litellm/proxy/guardrails/init_guardrails.py +1 -1
.venv/lib/python3.12/site-packages/litellm/proxy/hooks/parallel_request_limiter.py +117 -0
.venv/lib/python3.12/site-packages/litellm/proxy/management_endpoints/key_management_endpoints.py +35 -1
.venv/lib/python3.12/site-packages/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py +125 -23
.venv/lib/python3.12/site-packages/litellm/proxy/proxy_config.yaml +5 -3
.venv/lib/python3.12/site-packages/litellm/proxy/proxy_server.py +35 -1
.venv/lib/python3.12/site-packages/litellm/proxy/vertex_ai_endpoints/google_ai_studio_endpoints.py +138 -0
.venv/lib/python3.12/site-packages/litellm/router.py +17 -0
.venv/lib/python3.12/site-packages/litellm/tests/test_anthropic_completion.py +70 -1
.venv/lib/python3.12/site-packages/litellm/tests/test_key_generate_prisma.py +165 -0
.venv/lib/python3.12/site-packages/litellm/tests/test_least_busy_routing.py +12 -3
.venv/lib/python3.12/site-packages/litellm/tests/test_parallel_request_limiter.py +270 -0
.venv/lib/python3.12/site-packages/litellm/tests/test_pass_through_endpoints.py +4 -1
.venv/lib/python3.12/site-packages/litellm/tests/test_proxy_server.py +49 -0
.venv/lib/python3.12/site-packages/naptha_sdk-0.1.0.dist-info/RECORD +1 -1
.venv/lib/python3.12/site-packages/naptha_sdk-0.1.0.dist-info/direct_url.json +1 -1
.venv/lib/python3.12/site-packages/naptha_sdk/agent_service_engine.py +1 -1
.venv/src/naptha-sdk/naptha_sdk/agent_service_engine.py +1 -1
poetry.lock +5 -5
pyproject.toml +1 -1

.venv/lib/python3.12/site-packages/__pycache__/_virtualenv.cpython-312.pyc CHANGED Viewed

Binary files a/.venv/lib/python3.12/site-packages/__pycache__/_virtualenv.cpython-312.pyc and b/.venv/lib/python3.12/site-packages/__pycache__/_virtualenv.cpython-312.pyc differ

.venv/lib/python3.12/site-packages/litellm-1.43.18.dist-info/INSTALLER ADDED Viewed

	@@ -0,0 +1 @@


1	+ Poetry 1.8.3

.venv/lib/python3.12/site-packages/litellm-1.43.18.dist-info/LICENSE ADDED Viewed

	@@ -0,0 +1,26 @@

+Portions of this software are licensed as follows:
+* All content that resides under the "enterprise/" directory of this repository, if that directory exists, is licensed under the license defined in "enterprise/LICENSE".
+* Content outside of the above mentioned directories or restrictions above is available under the MIT license as defined below.
+---
+MIT License
+Copyright (c) 2023 Berri AI
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

.venv/lib/python3.12/site-packages/litellm-1.43.18.dist-info/METADATA ADDED Viewed

	@@ -0,0 +1,395 @@

+Metadata-Version: 2.1
+Name: litellm
+Version: 1.43.18
+Summary: Library to easily interface with LLM API providers
+License: MIT
+Author: BerriAI
+Requires-Python: >=3.8, !=2.7.*, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*, !=3.7.*
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Provides-Extra: extra-proxy
+Provides-Extra: proxy
+Requires-Dist: PyJWT (>=2.8.0,<3.0.0) ; extra == "proxy"
+Requires-Dist: aiohttp
+Requires-Dist: apscheduler (>=3.10.4,<4.0.0) ; extra == "proxy"
+Requires-Dist: azure-identity (>=1.15.0,<2.0.0) ; extra == "extra-proxy"
+Requires-Dist: azure-keyvault-secrets (>=4.8.0,<5.0.0) ; extra == "extra-proxy"
+Requires-Dist: backoff ; extra == "proxy"
+Requires-Dist: click
+Requires-Dist: cryptography (>=42.0.5,<43.0.0) ; extra == "proxy"
+Requires-Dist: fastapi (>=0.111.0,<0.112.0) ; extra == "proxy"
+Requires-Dist: fastapi-sso (>=0.10.0,<0.11.0) ; extra == "proxy"
+Requires-Dist: google-cloud-kms (>=2.21.3,<3.0.0) ; extra == "extra-proxy"
+Requires-Dist: gunicorn (>=22.0.0,<23.0.0) ; extra == "proxy"
+Requires-Dist: importlib-metadata (>=6.8.0)
+Requires-Dist: jinja2 (>=3.1.2,<4.0.0)
+Requires-Dist: jsonschema (>=4.22.0,<5.0.0)
+Requires-Dist: openai (>=1.40.0)
+Requires-Dist: orjson (>=3.9.7,<4.0.0) ; extra == "proxy"
+Requires-Dist: prisma (==0.11.0) ; extra == "extra-proxy"
+Requires-Dist: pydantic (>=2.0.0,<3.0.0)
+Requires-Dist: pynacl (>=1.5.0,<2.0.0) ; extra == "extra-proxy"
+Requires-Dist: python-dotenv (>=0.2.0)
+Requires-Dist: python-multipart (>=0.0.9,<0.0.10) ; extra == "proxy"
+Requires-Dist: pyyaml (>=6.0.1,<7.0.0) ; extra == "proxy"
+Requires-Dist: requests (>=2.31.0,<3.0.0)
+Requires-Dist: resend (>=0.8.0,<0.9.0) ; extra == "extra-proxy"
+Requires-Dist: rq ; extra == "proxy"
+Requires-Dist: tiktoken (>=0.7.0)
+Requires-Dist: tokenizers
+Requires-Dist: uvicorn (>=0.22.0,<0.23.0) ; extra == "proxy"
+Project-URL: documentation, https://docs.litellm.ai
+Project-URL: homepage, https://litellm.ai
+Project-URL: repository, https://github.com/BerriAI/litellm
+Description-Content-Type: text/markdown
+<h1 align="center">
+        🚅 LiteLLM
+    </h1>
+    <p align="center">
+        <p align="center">
+        <a href="https://render.com/deploy?repo=https://github.com/BerriAI/litellm" target="_blank" rel="nofollow"><img src="https://render.com/images/deploy-to-render-button.svg" alt="Deploy to Render"></a>
+        <a href="https://railway.app/template/HLP0Ub?referralCode=jch2ME">
+          <img src="https://railway.app/button.svg" alt="Deploy on Railway">
+        </a>
+        </p>
+        <p align="center">Call all LLM APIs using the OpenAI format [Bedrock, Huggingface, VertexAI, TogetherAI, Azure, OpenAI, Groq etc.]
+        <br>
+    </p>
+<h4 align="center"><a href="https://docs.litellm.ai/docs/simple_proxy" target="_blank">LiteLLM Proxy Server (LLM Gateway)</a> | <a href="https://docs.litellm.ai/docs/hosted" target="_blank"> Hosted Proxy (Preview)</a> | <a href="https://docs.litellm.ai/docs/enterprise"target="_blank">Enterprise Tier</a></h4>
+<h4 align="center">
+    <a href="https://pypi.org/project/litellm/" target="_blank">
+        <img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
+    </a>
+    <a href="https://dl.circleci.com/status-badge/redirect/gh/BerriAI/litellm/tree/main" target="_blank">
+        <img src="https://dl.circleci.com/status-badge/img/gh/BerriAI/litellm/tree/main.svg?style=svg" alt="CircleCI">
+    </a>
+    <a href="https://www.ycombinator.com/companies/berriai">
+        <img src="https://img.shields.io/badge/Y%20Combinator-W23-orange?style=flat-square" alt="Y Combinator W23">
+    </a>
+    <a href="https://wa.link/huol9n">
+        <img src="https://img.shields.io/static/v1?label=Chat%20on&message=WhatsApp&color=success&logo=WhatsApp&style=flat-square" alt="Whatsapp">
+    </a>
+    <a href="https://discord.gg/wuPM9dRgDw">
+        <img src="https://img.shields.io/static/v1?label=Chat%20on&message=Discord&color=blue&logo=Discord&style=flat-square" alt="Discord">
+    </a>
+</h4>
+LiteLLM manages:
+- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
+- [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']`
+- Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
+- Set Budgets & Rate limits per project, api key, model [LiteLLM Proxy Server (LLM Gateway)](https://docs.litellm.ai/docs/simple_proxy)
+[**Jump to LiteLLM Proxy (LLM Gateway) Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
+[**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-providers-docs)
+🚨 **Stable Release:** Use docker images with the `-stable` tag. These have undergone 12 hour load tests, before being published.
+Support for more providers. Missing a provider or LLM Platform, raise a [feature request](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+).
+# Usage ([**Docs**](https://docs.litellm.ai/docs/))
+> [!IMPORTANT]
+> LiteLLM v1.0.0 now requires `openai>=1.0.0`. Migration guide [here](https://docs.litellm.ai/docs/migration)
+> LiteLLM v1.40.14+ now requires `pydantic>=2.0.0`. No changes required.
+<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb">
+  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
+</a>
+```shell
+pip install litellm
+```
+```python
+from litellm import completion
+import os
+## set ENV variables
+os.environ["OPENAI_API_KEY"] = "your-openai-key"
+os.environ["COHERE_API_KEY"] = "your-cohere-key"
+messages = [{ "content": "Hello, how are you?","role": "user"}]
+# openai call
+response = completion(model="gpt-3.5-turbo", messages=messages)
+# cohere call
+response = completion(model="command-nightly", messages=messages)
+print(response)
+```
+Call any model supported by a provider, with `model=<provider_name>/<model_name>`. There might be provider-specific details here, so refer to [provider docs for more information](https://docs.litellm.ai/docs/providers)
+## Async ([Docs](https://docs.litellm.ai/docs/completion/stream#async-completion))
+```python
+from litellm import acompletion
+import asyncio
+async def test_get_response():
+    user_message = "Hello, how are you?"
+    messages = [{"content": user_message, "role": "user"}]
+    response = await acompletion(model="gpt-3.5-turbo", messages=messages)
+    return response
+response = asyncio.run(test_get_response())
+print(response)
+```
+## Streaming ([Docs](https://docs.litellm.ai/docs/completion/stream))
+liteLLM supports streaming the model response back, pass `stream=True` to get a streaming iterator in response.
+Streaming is supported for all models (Bedrock, Huggingface, TogetherAI, Azure, OpenAI, etc.)
+```python
+from litellm import completion
+response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
+for part in response:
+    print(part.choices[0].delta.content or "")
+# claude 2
+response = completion('claude-2', messages, stream=True)
+for part in response:
+    print(part.choices[0].delta.content or "")
+```
+## Logging Observability ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
+LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, DynamoDB, s3 Buckets, Helicone, Promptlayer, Traceloop, Athina, Slack
+```python
+from litellm import completion
+## set env variables for logging tools
+os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
+os.environ["HELICONE_API_KEY"] = "your-helicone-auth-key"
+os.environ["LANGFUSE_PUBLIC_KEY"] = ""
+os.environ["LANGFUSE_SECRET_KEY"] = ""
+os.environ["ATHINA_API_KEY"] = "your-athina-api-key"
+os.environ["OPENAI_API_KEY"]
+# set callbacks
+litellm.success_callback = ["lunary", "langfuse", "athina", "helicone"] # log input/output to lunary, langfuse, supabase, athina, helicone etc
+#openai call
+response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
+```
+# LiteLLM Proxy Server (LLM Gateway) - ([Docs](https://docs.litellm.ai/docs/simple_proxy))
+Track spend + Load Balance across multiple projects
+[Hosted Proxy (Preview)](https://docs.litellm.ai/docs/hosted)
+The proxy provides:
+1. [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth)
+2. [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class)
+3. [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend)
+4. [Rate Limiting](https://docs.litellm.ai/docs/proxy/users#set-rate-limits)
+## 📖 Proxy Endpoints - [Swagger Docs](https://litellm-api.up.railway.app/)
+## Quick Start Proxy - CLI
+```shell
+pip install 'litellm[proxy]'
+```
+### Step 1: Start litellm proxy
+```shell
+$ litellm --model huggingface/bigcode/starcoder
+#INFO: Proxy running on http://0.0.0.0:4000
+```
+### Step 2: Make ChatCompletions Request to Proxy
+> [!IMPORTANT]
+> 💡 [Use LiteLLM Proxy with Langchain (Python, JS), OpenAI SDK (Python, JS) Anthropic SDK, Mistral SDK, LlamaIndex, Instructor, Curl](https://docs.litellm.ai/docs/proxy/user_keys)
+```python
+import openai # openai v1.0.0+
+client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:4000") # set proxy to base_url
+# request sent to model set on litellm proxy, `litellm --model`
+response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
+    {
+        "role": "user",
+        "content": "this is a test request, write a short poem"
+    }
+])
+print(response)
+```
+## Proxy Key Management ([Docs](https://docs.litellm.ai/docs/proxy/virtual_keys))
+Connect the proxy with a Postgres DB to create proxy keys
+```bash
+# Get the code
+git clone https://github.com/BerriAI/litellm
+# Go to folder
+cd litellm
+# Add the master key - you can change this after setup
+echo 'LITELLM_MASTER_KEY="sk-1234"' > .env
+# Add the litellm salt key - you cannot change this after adding a model
+# It is used to encrypt / decrypt your LLM API Key credentials
+# We recommned - https://1password.com/password-generator/
+# password generator to get a random hash for litellm salt key
+echo 'LITELLM_SALT_KEY="sk-1234"' > .env
+source .env
+# Start
+docker-compose up
+```
+UI on `/ui` on your proxy server
+![ui_3](https://github.com/BerriAI/litellm/assets/29436595/47c97d5e-b9be-4839-b28c-43d7f4f10033)
+Set budgets and rate limits across multiple projects
+`POST /key/generate`
+### Request
+```shell
+curl 'http://0.0.0.0:4000/key/generate' \
+--header 'Authorization: Bearer sk-1234' \
+--header 'Content-Type: application/json' \
+--data-raw '{"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"], "duration": "20m","metadata": {"user": "ishaan@berri.ai", "team": "core-infra"}}'
+```
+### Expected Response
+```shell
+{
+    "key": "sk-kdEXbIqZRwEeEiHwdg7sFA", # Bearer token
+    "expires": "2023-11-19T01:38:25.838000+00:00" # datetime object
+}
+```
+## Supported Providers ([Docs](https://docs.litellm.ai/docs/providers))
+| Provider                                                                            | [Completion](https://docs.litellm.ai/docs/#basic-usage) | [Streaming](https://docs.litellm.ai/docs/completion/stream#streaming-responses) | [Async Completion](https://docs.litellm.ai/docs/completion/stream#async-completion) | [Async Streaming](https://docs.litellm.ai/docs/completion/stream#async-streaming) | [Async Embedding](https://docs.litellm.ai/docs/embedding/supported_embedding) | [Async Image Generation](https://docs.litellm.ai/docs/image_generation) |
+|-------------------------------------------------------------------------------------|---------------------------------------------------------|---------------------------------------------------------------------------------|-------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------|-------------------------------------------------------------------------------|-------------------------------------------------------------------------|
+| [openai](https://docs.litellm.ai/docs/providers/openai)                             | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 | ✅                                                                             | ✅                                                                       |
+| [azure](https://docs.litellm.ai/docs/providers/azure)                               | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 | ✅                                                                             | ✅                                                                       |
+| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker)             | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 | ✅                                                                             |                                                                         |
+| [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock)                     | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 | ✅                                                                             |                                                                         |
+| [google - vertex_ai](https://docs.litellm.ai/docs/providers/vertex)                 | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 | ✅                                                                             | ✅                                                                       |
+| [google - palm](https://docs.litellm.ai/docs/providers/palm)                        | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini)          | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [mistral ai api](https://docs.litellm.ai/docs/providers/mistral)                    | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 | ✅                                                                             |                                                                         |
+| [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers)  | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [cohere](https://docs.litellm.ai/docs/providers/cohere)                             | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 | ✅                                                                             |                                                                         |
+| [anthropic](https://docs.litellm.ai/docs/providers/anthropic)                       | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [empower](https://docs.litellm.ai/docs/providers/empower)                    | ✅                                                      | ✅                                                                              | ✅                                                                                  | ✅                                                                                |
+| [huggingface](https://docs.litellm.ai/docs/providers/huggingface)                   | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 | ✅                                                                             |                                                                         |
+| [replicate](https://docs.litellm.ai/docs/providers/replicate)                       | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [together_ai](https://docs.litellm.ai/docs/providers/togetherai)                    | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [openrouter](https://docs.litellm.ai/docs/providers/openrouter)                     | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [ai21](https://docs.litellm.ai/docs/providers/ai21)                                 | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [baseten](https://docs.litellm.ai/docs/providers/baseten)                           | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [vllm](https://docs.litellm.ai/docs/providers/vllm)                                 | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [nlp_cloud](https://docs.litellm.ai/docs/providers/nlp_cloud)                       | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [aleph alpha](https://docs.litellm.ai/docs/providers/aleph_alpha)                   | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [petals](https://docs.litellm.ai/docs/providers/petals)                             | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [ollama](https://docs.litellm.ai/docs/providers/ollama)                             | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 | ✅                                                                             |                                                                         |
+| [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra)                       | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity)                  | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [Groq AI](https://docs.litellm.ai/docs/providers/groq)                              | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [Deepseek](https://docs.litellm.ai/docs/providers/deepseek)                         | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [anyscale](https://docs.litellm.ai/docs/providers/anyscale)                         | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+| [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx)                  | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 | ✅                                                                             |                                                                         |
+| [voyage ai](https://docs.litellm.ai/docs/providers/voyage)                          |                                                         |                                                                                 |                                                                                     |                                                                                   | ✅                                                                             |                                                                         |
+| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) |                                                         |                                                                                 |                                                                                     |                                                                                   | ✅                                                                             |                                                                         |
+| [FriendliAI](https://docs.litellm.ai/docs/providers/friendliai)                              | ✅                                                       | ✅                                                                               | ✅                                                                                   | ✅                                                                                 |                                                                               |                                                                         |
+[**Read the Docs**](https://docs.litellm.ai/docs/)
+## Contributing
+To contribute: Clone the repo locally -> Make a change -> Submit a PR with the change.
+Here's how to modify the repo locally:
+Step 1: Clone the repo
+```
+git clone https://github.com/BerriAI/litellm.git
+```
+Step 2: Navigate into the project, and install dependencies:
+```
+cd litellm
+poetry install -E extra_proxy -E proxy
+```
+Step 3: Test your change:
+```
+cd litellm/tests # pwd: Documents/litellm/litellm/tests
+poetry run flake8
+poetry run pytest .
+```
+Step 4: Submit a PR with your changes! 🚀
+- push your fork to your GitHub repo
+- submit a PR from there
+# Enterprise
+For companies that need better security, user management and professional support
+[Talk to founders](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
+This covers:
+- ✅ **Features under the [LiteLLM Commercial License](https://docs.litellm.ai/docs/proxy/enterprise):**
+- ✅ **Feature Prioritization**
+- ✅ **Custom Integrations**
+- ✅ **Professional Support - Dedicated discord + slack**
+- ✅ **Custom SLAs**
+- ✅ **Secure access with Single Sign-On**
+# Support / talk with founders
+- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
+- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
+- Our numbers 📞 +1 (770) 8783-106 / ‭+1 (412) 618-6238‬
+- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
+# Why did we build this
+- **Need for simplicity**: Our code started to get extremely complicated managing & translating calls between Azure, OpenAI and Cohere.
+# Contributors
+<!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section -->
+<!-- prettier-ignore-start -->
+<!-- markdownlint-disable -->
+<!-- markdownlint-restore -->
+<!-- prettier-ignore-end -->
+<!-- ALL-CONTRIBUTORS-LIST:END -->
+<a href="https://github.com/BerriAI/litellm/graphs/contributors">
+  <img src="https://contrib.rocks/image?repo=BerriAI/litellm" />
+</a>

.venv/lib/python3.12/site-packages/litellm-1.43.18.dist-info/RECORD ADDED Viewed

	@@ -0,0 +1,577 @@

+../../../bin/litellm,sha256=C8U9cdkxUI1nR-SrwEkSOXFmeppVfM5f1V9oXW1nlx8,283
+litellm/__init__.py,sha256=RgjXw3jbyF2m_WUVLIrcH0Uykz_pJC0FcdcYzy0l0n4,31281
+litellm/_logging.py,sha256=cE1K1nljhW8yjV_-s-vJ0YQ8DJsNHvzWc1MFECIE3QY,2963
+litellm/_redis.py,sha256=d9i9_Q1EJyQ3dRsgLhC18I792BiVxKFL9p0mqVv8aoU,5207
+litellm/_service_logger.py,sha256=oxrLpjtssQN49A2QbwVdqBpaLoXtHfeQuEeVhaMolBI,6712
+litellm/_version.py,sha256=ydZafSCEtu5V7o42UV_a8DTJyuLk-TgDZ5C5BUS6NTQ,101
+litellm/adapters/anthropic_adapter.py,sha256=ySQ4Zsgat2lDZIR392QAyRFRciWeHVvz_4_fjS6HsPY,7356
+litellm/assistants/main.py,sha256=-LRjZQz97ZYdlSMPAAq6s9IDLM5DfxbxpuzYOly03VA,47544
+litellm/assistants/utils.py,sha256=3Ru97LHqMZDaibntq4Gczl9J2txLIUsK9lZBrNmEnmk,5729
+litellm/batches/main.py,sha256=hq1kNNtubnPvJUv4ay4ZK-gYchf2VzXeGtqzMUmn7C8,15438
+litellm/budget_manager.py,sha256=ed2TJr3-t0VFsEYehB7vFleJB5pnkfvvoRsYOkrc8K0,8304
+litellm/caching.py,sha256=DQrrQyh_2Sizb932fZLlJAVoV_7rSzpC1du7gWBjtJo,91470
+litellm/cost.json,sha256=GJEXQcWy9ZvA5DhsPlWnolw-0gK_JG6PQRC67EO6VmQ,108
+litellm/cost_calculator.py,sha256=oA7kbg-_-Kg8ty88bRo7OxkH8L3Yn6qQfRr4TQvAq2g,33078
+litellm/deprecated_litellm_server/.env.template,sha256=CO7AgKScAzceHKw711IG7q0_mlI_DIf-P8i3j9jdMbE,941
+litellm/deprecated_litellm_server/Dockerfile,sha256=uduHN-pR8EqaWTpSgrU9gs1fFsMhIi1m_kCLovmtKtY,224
+litellm/deprecated_litellm_server/README.md,sha256=TieXW_VcgnNoSt6juEbXn02I846Z7mJgDqxTThNsjYM,62
+litellm/deprecated_litellm_server/__init__.py,sha256=U1MJ3xxMEDLZx1GTB6IFV0IsmIo0Ri0uAxXBIQPB2z0,52
+litellm/deprecated_litellm_server/main.py,sha256=gjh5DHJ2n4UQVcd6bNjMcBHxlsOgl8ARvHnsigQXUjE,8350
+litellm/deprecated_litellm_server/requirements.txt,sha256=ocISAeyHpH-UgvlL0557UXL6S1XJJ-bxh1P4uU2lTI8,70
+litellm/deprecated_litellm_server/server_utils.py,sha256=vQze7bSC-11yd2GvQ_LatrKLXzrUamr7vRIuAM5PvrM,3221
+litellm/exceptions.py,sha256=fyP2enX3vQo9w6P-R3wwrII1qe9h6gzv1oN8RiJAMjA,27887
+litellm/files/main.py,sha256=sbpq7OjubHiSt2rBUO98UDiuYcdVAEqoGJ97VEwSds8,26036
+litellm/fine_tuning/main.py,sha256=9hhRKJEytPaCRjiGJqKlM_IaNAA34q2LETzGwc8HTL4,22114
+litellm/integrations/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
+litellm/integrations/_types/open_inference.py,sha256=OKr0ZQIozlmyNhQh1fCv1ONrVAEUuDj-pZZv_JgK7Ao,7402
+litellm/integrations/aispend.py,sha256=XfwmWvaDAgU7OCipI_nAYpJTOrNX8mZv3BrZMymRnSY,6423
+litellm/integrations/arize_ai.py,sha256=knvcaUlnMCokm3OhNttM5mxjA6v4avZgj9cIV-t9VUQ,4034
+litellm/integrations/athina.py,sha256=ozijkeM4qnfCIla8ZDvDgS0oejfz2vzj8noloZxypt8,3560
+litellm/integrations/berrispend.py,sha256=FrL04R7bMLSre-dwt8GM9jA62Aqhyf2XJ7iiXok_5rQ,6577
+litellm/integrations/braintrust_logging.py,sha256=xcjjzALBrxtE24w2w0umPOxg7wuJrRx6ej6Gow_DXdo,14128
+litellm/integrations/clickhouse.py,sha256=fsJ33CcXnK700mpxTpUeEp5Yp_TIEgbPNE5t6tSAAIg,10305
+litellm/integrations/custom_logger.py,sha256=VUi431sVQsKDOaPB1f7cDoHnuyH7uDjSCt4IqeqeSts,6875
+litellm/integrations/datadog.py,sha256=Kk_D2JaP8UnpcUNk0fEv34Du6pbvqYBgOVUOpo5YNto,5400
+litellm/integrations/dynamodb.py,sha256=HPh_L5n55hkNqnRp7z1PtaCYSL7Po7YNl_eRmg3_uNM,3229
+litellm/integrations/email_alerting.py,sha256=aIP_Q03X60PSqJQ3rerWsOC4qS12Fh21GoSAGqF2nbc,4460
+litellm/integrations/email_templates/templates.py,sha256=aLw_bBXNBImuTN5u7w6Z4_WKBWU_p1zKOOi48-nWhuY,2277
+litellm/integrations/galileo.py,sha256=NhdpG1lyqHqbz_po_tPWKDfcRJfyEpUHILadIibhQwo,5613
+litellm/integrations/gcs_bucket.py,sha256=d5xEZAO9g7M0G4oZZOpltZlk4CuLupry7eH_TqAad94,11043
+litellm/integrations/greenscale.py,sha256=HBghButcuhRuP_cjl5Qxi9m2fitGfoDMIqh5W4RFyNA,2560
+litellm/integrations/helicone.py,sha256=-My_KVQdg5XTewGqk_AwiP5akUNZ7UsekhoQI1otlEY,6894
+litellm/integrations/lago.py,sha256=eob9SSnjehQSnEPqphAnUMm3PLtJUu3Z-La5Q3iCYlE,6407
+litellm/integrations/langfuse.py,sha256=rVHyEY_md8hE8gGFXGqQgj5w9SdKssVSes8t_dSsBdU,28471
+litellm/integrations/langsmith.py,sha256=25fd5HZvDhJtwrBJ3mXVsFDIfb9s6Oud1AILmGaxu6o,8279
+litellm/integrations/litedebugger.py,sha256=VPfy6gIcZ1ahQIB9rJwniCO0zlWcLPTssg3mr8H9w0w,11088
+litellm/integrations/logfire_logger.py,sha256=9-r9IvD8etmckjsKGmj5XpTY3dbUiewR6Gxhqb-_ftc,6155
+litellm/integrations/lunary.py,sha256=Eyh9D0pPVGPIcDSCT9RoZ0rRGSwKmzy1L-3Ko4QAQnw,5229
+litellm/integrations/openmeter.py,sha256=aX3SCNaoYiXd2oqKr6-00gzv7hS80mVKdluO64qmmSw,4534
+litellm/integrations/opentelemetry.py,sha256=iEfd1tS95_sZAwQxgvm5vCLD4sElyzyqsgzhUUSVFZg,30286
+litellm/integrations/prometheus.py,sha256=fVet5dwx4jqB4sE7Wgrp2jzdu-jxIejn5etfU7GStAo,23534
+litellm/integrations/prometheus_helpers/prometheus_api.py,sha256=4Dc05zzdTPGHAIy1kV629Emr-gxr0g129x_IbmEd4tc,2527
+litellm/integrations/prometheus_services.py,sha256=HiauxN2ZeEpGX0iiAWatxq24gAahQwqnuHr5KUeNnlI,7375
+litellm/integrations/prompt_layer.py,sha256=RmCBDOOYXXx5X0jc-zIzpdDIcOGaMewgzr8cPu7ailo,3572
+litellm/integrations/s3.py,sha256=EQlNn8dneZ66zl07laP1MU7E4lwIMtzzoZ4zHSPBjUU,6877
+litellm/integrations/slack_alerting.py,sha256=rvYArR8JGX5_m-GV2Gl8lc4Ac4CLB-osnJRb0_u7TCk,68224
+litellm/integrations/supabase.py,sha256=0hGcHrWR-rmIucLxUy2AM1DhKSGiibAqgID25O39nxo,4042
+litellm/integrations/test_httpx.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+litellm/integrations/traceloop.py,sha256=Whgy-4so47PlyHT-ej6N5WYInLPvvccbBoWe1oOYTp8,5913
+litellm/integrations/weights_biases.py,sha256=tZGuknLmre98aElgA9vqZ8dhTUzyMR3gEfn82vZILcA,7813
+litellm/litellm_core_utils/core_helpers.py,sha256=JMWnc4mZ8FzyA5mYIOndVVK_phhpPc7EK16J3GZFbTk,3814
+litellm/litellm_core_utils/exception_mapping_utils.py,sha256=mbETVEpgENAq8nVHcNyiXQWiBkKAfZuy1-dUU1zUuww,1535
+litellm/litellm_core_utils/json_validation_rule.py,sha256=rtDKG_1vyTUsDp2BCUN6mj7jf_EDtOnQYxAfXcKTuz0,790
+litellm/litellm_core_utils/litellm_logging.py,sha256=zk4pEaqBaxbWvoQmnRQ2g8oNYOAdggTHRPcJ_gnszYE,109711
+litellm/litellm_core_utils/llm_cost_calc/google.py,sha256=Jbqg0Ur1gMNKMrGFlcw3rNx9gGdqnEmWmSrkTVRo-64,8276
+litellm/litellm_core_utils/llm_cost_calc/utils.py,sha256=oE-V0I0SQjqwUn2mm42WnNCFP8kk_OS2MGQBgLrRbYs,2991
+litellm/litellm_core_utils/llm_request_utils.py,sha256=DunH2Xg73qKcqkv0yh_TcxuNsZeP3CeL_Zzl1NK_0Yw,986
+litellm/litellm_core_utils/logging_utils.py,sha256=BzKIdfYfFieznqyZ176lVcOdRhhaRTOv1tW4XKUrTmA,531
+litellm/litellm_core_utils/redact_messages.py,sha256=rk7A2i6GVtTYvPg-GufOsVViz7zJ-T4PlyBi3wXvPgs,3768
+litellm/litellm_core_utils/streaming_utils.py,sha256=1te4DqCt590w7yPE4qWev9FSCqRT9AxPu_NUsp84oWo,600
+litellm/litellm_core_utils/token_counter.py,sha256=BF4xHo4ut6b5DBVd5r4bMfLyW_81_AvDT5n_0hKDQh4,3144
+litellm/llms/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
+litellm/llms/ai21.py,sha256=LBVSkxybKuR3B4iJEHrZE0YhtIykq3SDW5TRaSGToaA,7898
+litellm/llms/aleph_alpha.py,sha256=QfkTSNzPZgzJtO72Q3j7baq_SKPEkApvU3u4skWkoDE,12754
+litellm/llms/anthropic.py,sha256=ydcc0elNtEPRCm6QxqgZgLrh1cYp5uM3o4Y_dnTL3kw,52574
+litellm/llms/anthropic_text.py,sha256=_a1uvk8rp1PumeSntgIrRS40oc0uPpZiKEPzE_IQuZI,11144
+litellm/llms/azure.py,sha256=WRyjarQH1cK0lqDsIqbtIP8ljMC0O-Pyl-hrOi_j90A,96911
+litellm/llms/azure_text.py,sha256=ke6yLeKEVtxvqNUuEOBzp06KqCocScCMSSyuqsyGNfE,20074
+litellm/llms/base.py,sha256=iDcvsYjKW28jLdXHHI0izlJD0tQ7sJy8KKrye6916b8,2609
+litellm/llms/base_aws_llm.py,sha256=yIvyVNXn3koJRsZ5VSD-yfunQ9hrDT4geM7QN3y4_BY,7874
+litellm/llms/baseten.py,sha256=I43FimMq1qnPlHh1cbUIssVDN_h_i3sgG84jV2VP7Rc,6062
+litellm/llms/bedrock.py,sha256=ngnu3qF1YtzHxCRHb5Eek1d2VqNK4w2ZRtcHdUZIJDw,56661
+litellm/llms/bedrock_httpx.py,sha256=3OurzBkxF-Gqvd0w9grj36S9wfO4by5TIAFgOf0AEsU,79410
+litellm/llms/clarifai.py,sha256=MYjU5UyNbMy5kpOFPkq0ql1izHXOadgztX8esrmUaHs,10716
+litellm/llms/cloudflare.py,sha256=6p4TlxvxUNRwtMTAwUDlKDC0HHmaym3SMXLtQVVvr0s,5598
+litellm/llms/cohere.py,sha256=C8ZxdOI5WdWgvDU0dhKtF5xT7Ca_cpZRq4S8-oUUHQo,13375
+litellm/llms/cohere_chat.py,sha256=KeDi07XnQWgEHC6HHPfqvYG4LmGYcrhjLZP6FiksLFE,12093
+litellm/llms/custom_httpx/http_handler.py,sha256=aclVKWDD1jd97qVcIwE7OYuoUBqJ5zZreDUdRdLpbpk,11252
+litellm/llms/custom_httpx/httpx_handler.py,sha256=v8HYYvxAywJCZubWqYGvdiBF6fH6B8Y-XjDWZLv0fus,1249
+litellm/llms/custom_llm.py,sha256=GHEInUvjyz5MOShpSnPa3RQMGcYUIAT0YLLXVrXfj5E,4264
+litellm/llms/databricks.py,sha256=JwWopK46bwSUG8RjEPs28f0oV9dxqg6efokJCjSfvxI,25779
+litellm/llms/files_apis/azure.py,sha256=fFAIkze3mxRES9VWuMTwS0eGGTIF77fSOrUc3UXMgRo,11098
+litellm/llms/fine_tuning_apis/azure.py,sha256=6TRtrnDoQn8jp5B0dm9Lop5jnCFc0H3_8xLGT6xSn9E,6857
+litellm/llms/fine_tuning_apis/openai.py,sha256=X_kZVX9nwyKv7FcjagStRfNSLCFdbw6M8ssoW78c-ZE,7226
+litellm/llms/fine_tuning_apis/vertex_ai.py,sha256=y3s64IovOF-oRrelxtx88-ohI5xkUggQSi1r44LJDfw,11674
+litellm/llms/fireworks_ai.py,sha256=UE0BPONbFez4T0RrN-kxW62BRlqd4_IZKex2zkuCrBg,3595
+litellm/llms/gemini.py,sha256=lld1t-vN8LiB5ESpZWcS4sCYZgfFrsTSaEj4bzDU73k,15616
+litellm/llms/huggingface_llms_metadata/hf_conversational_models.txt,sha256=-KennA-85KE2N-dTyR2TG4v30NvWc6IAE6zCIEngjZQ,76183
+litellm/llms/huggingface_llms_metadata/hf_text_generation_models.txt,sha256=IskID-RI7HHQTqVB8cTAHoAOIVoeGm8vhFz5opYL8Kk,1288358
+litellm/llms/huggingface_restapi.py,sha256=UJX4hkW2DrKeeVWFtebCa3EOUfjuhI7MFE1ACj3iYKI,44377
+litellm/llms/maritalk.py,sha256=i3BGq_CJYB8z30dpb3PHt0gpeBCimBQhbNvg_HcT_pQ,5930
+litellm/llms/nlp_cloud.py,sha256=AJSyYEv3JVAB6j9BPm61nkHx0LeZqHChU4maEZ1tjRs,8004
+litellm/llms/nvidia_nim.py,sha256=pYVGYZeQF-VAB__bov-OIjG3C5Nok_ZZDnEXmi6Xt7Y,4619
+litellm/llms/ollama.py,sha256=iHOz2ICaBWFUJLLHok5TXcg0EVK06rQTYT5pTBVD3cY,22684
+litellm/llms/ollama_chat.py,sha256=_CjoCTnK4nvN7L0VU9xyU-y0CwFdvMFlP_Ny7F2WB4U,22396
+litellm/llms/oobabooga.py,sha256=tygqCpcBiev_1ojYz-e84Gblzic3r2DJBszYMhkh99U,5474
+litellm/llms/openai.py,sha256=ib3LSyUEwp6m7TWnn00aCBkniEPxfZUm9KXLPsfb8GM,126187
+litellm/llms/openrouter.py,sha256=2KIepjHPqDXseisZ37ZjWQTgGDqDA8TDLBtb0hTGmqU,1026
+litellm/llms/palm.py,sha256=jeuHIYmUvXnzmudYsxZrFhW03HoYNnhP4vWTPEIVGXo,7042
+litellm/llms/petals.py,sha256=GPhhmIXqftEs3PLYg6k0IJvazTb9CzPyhBouTg6DXi0,7032
+litellm/llms/predibase.py,sha256=_hNXV1RvEr9wpiNAFDbVW68Hq8y0wqw9_hcvHEWOxrw,22616
+litellm/llms/prompt_templates/factory.py,sha256=dAGCJU0qOIm0EL5Nl-3CplK1IU1izuI_XhFAmYoAWj4,104166
+litellm/llms/replicate.py,sha256=GWxD-VJcgWEo_iuavL-RiWIlUFoa_BL9dIenMCaLEj4,22090
+litellm/llms/sagemaker.py,sha256=4DTZquhoUbOG72DOtpfpE1T6QZ1EbxCsRwr0eNtZTuQ,35170
+litellm/llms/text_completion_codestral.py,sha256=itv5aHanYOi_OXH77gf-IS6VL5s3XWXPHogY_ZpILMQ,17906
+litellm/llms/together_ai.py,sha256=GiTyexiXGQ7CFh5rRYl4dXjmFfqpFQWjJgCgHG5pMmY,9310
+litellm/llms/tokenizers/9b5ad71b2ce5302211f9c61530b329a4922fc6a4,sha256=Ijkht27pm96ZW3_3OFE-7xAPtR0YyTWXoRO8_-hlsqc,1681126
+litellm/llms/tokenizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+litellm/llms/tokenizers/anthropic_tokenizer.json,sha256=wkFzffJLTn98mvT9zuKaDKkD3LKIqLdTvDRqMJKRF2c,1774213
+litellm/llms/tokenizers/ec7223a39ce59f226a68acc30dc1af2788490e15,sha256=lLXKff9NAHZ7wlb90bJ-Wxc2HXuKX5aFR_nyPrcNIGk,836186
+litellm/llms/tokenizers/fb374d419588a4632f3f557e76b4b70aebbca790,sha256=RGqVOMtsNI41FhINfAiwn1fDZJXirP_-WaW_iwz7Gi0,3613922
+litellm/llms/triton.py,sha256=TXjx0dE6OhZozq0XifUwShaR540UkR4ASSDzAST1Xg4,11123
+litellm/llms/vertex_ai.py,sha256=B-AJJ5a7LYu27jW_oUEIhrwVMZO7kYBlhMGkbqtTXlA,58900
+litellm/llms/vertex_ai_anthropic.py,sha256=NHSVl9EItKtYxrCuZWcd-mwrXteXW1I_BbkWcfgbu9U,15364
+litellm/llms/vertex_ai_partner.py,sha256=kCkcbcQV4ABKheTG35QxrJCiLHjji588f0BhtFUBlMo,8481
+litellm/llms/vertex_httpx.py,sha256=yEoOuO0cXYG5RdGdld-nIp319_3t32z0ExItCkMpqDw,66479
+litellm/llms/vllm.py,sha256=p_m0E4E_C5UKMjqfN1TiAt7oFsFoqS3yZgn3EaQSBbs,6122
+litellm/llms/volcengine.py,sha256=f4CyIpbxff8trhkB6SEFNh-VmwwxkN_CD6Z32U3we9Y,2689
+litellm/llms/watsonx.py,sha256=BqWLNjcKHhVZUZ1uF9KwZcT-zi74pE_E8z3QAe5o8NA,31464
+litellm/main.py,sha256=2gZaGO9GOtTjWfnpvf1nQ-n4nF5lY03CWzpoXKiTzDg,205901
+litellm/model_prices_and_context_window_backup.json,sha256=d56iJoU85W4Sa44iRXTW2X8flxjbX4-y0IpjJsybQrg,181982
+litellm/proxy/.gitignore,sha256=v2ZocUpppVuVfYJh1Bd1JpjpYSLxifJdClMEo0oOdT0,17
+litellm/proxy/README.md,sha256=MiZkO5ggaGLYVglWypp43xUgnWCL08XsqQAMSY8m5h8,787
+litellm/proxy/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
+litellm/proxy/_experimental/out/_next/static/chunks/131-cb6bfe24e23e121b.js,sha256=PuBjUDy1cjBTTKd60Ctu6FBVzkqDzrEtQYg7B7atiDA,681365
+litellm/proxy/_experimental/out/_next/static/chunks/2f6dbc85-cac2949a76539886.js,sha256=L-FNXiIqsiBouoQ_haADmDIgNst-SPCs1iQhLFTNSKU,31064
+litellm/proxy/_experimental/out/_next/static/chunks/3014691f-b24e8254c7593934.js,sha256=5vmdZfOJ4jrMbHo5TPKda4yuzYkai8ohJGTW4V2B2oI,725
+litellm/proxy/_experimental/out/_next/static/chunks/505-5ff3c318fddfa35c.js,sha256=RlyUAsnfKQvWgPhkR2YKjvG6FiXNaMwIMdge7oC6UM0,327591
+litellm/proxy/_experimental/out/_next/static/chunks/605-8e4b96f972af8eaf.js,sha256=P0WKRKtSxBAoWrYXjst0lQNpkP-145DaytbbGgrzeus,1015030
+litellm/proxy/_experimental/out/_next/static/chunks/684-16b194c83a169f6d.js,sha256=cNjHfykyxpnQyi-NpQMUID_fTfjX7qozLPEa0LRtGSY,49660
+litellm/proxy/_experimental/out/_next/static/chunks/69-8316d07d1f41e39f.js,sha256=lAalMsBcPUObqwncSYLplvSN_d0ercbP0lsldZBedlc,113335
+litellm/proxy/_experimental/out/_next/static/chunks/777-50d836152fad178b.js,sha256=Xzm7fyD3wuRH1iPlPxn8qKtNaOmb8kRu_sQlfT4TdGc,28836
+litellm/proxy/_experimental/out/_next/static/chunks/app/_not-found-4163791cb6a88df1.js,sha256=vRsMkO0OtbtzFDUnTLKV0chuVk6FfeQxtwfnvu8HyiQ,1775
+litellm/proxy/_experimental/out/_next/static/chunks/app/layout-f84e1a2dc32a2b83.js,sha256=-f5KFlm1rJ9y4O780hHPtxWb-Ctj7ONFugdZyfU0d3w,420
+litellm/proxy/_experimental/out/_next/static/chunks/app/model_hub/page-79eee78ed9fccf89.js,sha256=EN-k7S4w9EFNHNp6q-Bdn_-ZTzUH8tETMHWkcU1_DPA,7568
+litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/page-8be9c2a4a5c886c5.js,sha256=YX_2ILOyKtNvizwJvoQjWrJi_90y_rPpfTzZd5OwH_U,2788
+litellm/proxy/_experimental/out/_next/static/chunks/app/page-539019eb3e09177c.js,sha256=kxrgf_yf2YsngNq-oze8SRT3vD4r1gAeLPzgoXdmMlA,166748
+litellm/proxy/_experimental/out/_next/static/chunks/fd9d1056-f593049e31b05aeb.js,sha256=QI0CKrpO8VBSIcDy-QBfPdQhTKoEXsIhfBXiQbnI16M,172192
+litellm/proxy/_experimental/out/_next/static/chunks/framework-b370f160bb96059c.js,sha256=rdVv5xb7IDglnNf8MXneJb9zkELY_b3ITKCE5ThunrQ,141006
+litellm/proxy/_experimental/out/_next/static/chunks/main-a61244f130fbf565.js,sha256=zlIcVUCT3XuOxqLsd5zTJ0JGu0Lwt0TLGBIG9mPbPqM,109705
+litellm/proxy/_experimental/out/_next/static/chunks/main-app-9b4fb13a7db53edf.js,sha256=zl4knUp_PI9M9cnbWbJql9TmKbAJAc0Y_R6SPOzb_MI,470
+litellm/proxy/_experimental/out/_next/static/chunks/pages/_app-d21e88acd55d90f1.js,sha256=6Tfqn-C_NCH5A9z56noXA-XhmIzoyHHa8hNefPRJVwE,284
+litellm/proxy/_experimental/out/_next/static/chunks/pages/_error-d6107f1aac0c574c.js,sha256=eCsfmitISoEXoWoho7nSup51aVxMHxTUS5HZqTniPMg,249
+litellm/proxy/_experimental/out/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js,sha256=AiXrA00CSgO9yQ6mx59WGTZi58Pu6QlpYpiCDlF8u4M,91460
+litellm/proxy/_experimental/out/_next/static/chunks/webpack-193a7eac80c8baba.js,sha256=1Psq5CSz-83kmJjbVL11T7-RpQgTka0WR1yvRkBvMyU,3833
+litellm/proxy/_experimental/out/_next/static/css/cd10067a0a3408b4.css,sha256=UMww9uOUjZbSIH7JFsZO5N9Ahf37hebjGfNnzmaCBqw,435002
+litellm/proxy/_experimental/out/_next/static/fV1QC9ULIrhC5SUk7IqET/_buildManifest.js,sha256=1ZIrpmVMAUNAG9YfFRaxhQ_0tNDBUuM6anTEgxedIAU,224
+litellm/proxy/_experimental/out/_next/static/fV1QC9ULIrhC5SUk7IqET/_ssgManifest.js,sha256=Z49s4suAsf5y_GfnQSvm4qtq2ggxEbZPfEDTXjy6XgA,80
+litellm/proxy/_experimental/out/_next/static/media/26a46d62cd723877-s.woff2,sha256=lOXII-cuccwg9L-imwQ08iYAQJZdnQZsDny13Jn_1sM,18820
+litellm/proxy/_experimental/out/_next/static/media/55c55f0601d81cf3-s.woff2,sha256=zhKUdvQpmyFbVbHu0vxDJLRr2yZB4R1Z4kgbdEpH0RQ,25908
+litellm/proxy/_experimental/out/_next/static/media/581909926a08bbc8-s.woff2,sha256=6sXLry_RZ9cH4ezy5o4q8jK-nPyVJXQWRBfzKllMXvw,19072
+litellm/proxy/_experimental/out/_next/static/media/6d93bde91c0c2823-s.woff2,sha256=MuUklqJWCJ8nnGFQGu-7Q4D3ksk_Aex6bnNWeP1is_E,74316
+litellm/proxy/_experimental/out/_next/static/media/97e0cb1ae144a2a9-s.woff2,sha256=PSMwBhtNmlpSF4kzHs5Rp1X9plNe2ybXQJtxBSizzQ0,11220
+litellm/proxy/_experimental/out/_next/static/media/a34f9d1faa5f3315-s.p.woff2,sha256=yI2yQBvvfhID4JM8xVJaD4GGO_0HZ1bbEqzqVZbwiew,48556
+litellm/proxy/_experimental/out/_next/static/media/df0a9ae256c0569c-s.woff2,sha256=jbAP9Gxnsizai-2GWs9wd2UcrI0oQdW0CYBVa0iWGTE,10280
+litellm/proxy/_experimental/out/favicon.ico,sha256=Ikbq6HOjEekHeOnx68AzqSl19Y5YPKwdQv7FyKK6Q8M,15406
+litellm/proxy/_experimental/out/index.html,sha256=irBRWUnm0PrWadX8OEpsFwI1-G2sf__G_R9-qZas-VM,4970
+litellm/proxy/_experimental/out/index.txt,sha256=Mg3w2XuTUbt99MQwxoErqmg176uK0d6IHKh6FZuW3IA,2814
+litellm/proxy/_experimental/out/model_hub.txt,sha256=4q9pFlK2rrDn26rNEGprA70dAS5SKqF-xfq6ROAICJE,3121
+litellm/proxy/_experimental/out/next.svg,sha256=VZld-tbstJRaHoVt3KA8XhaqW_E_0htN9qdK55NXvPw,1375
+litellm/proxy/_experimental/out/onboarding.txt,sha256=oUHO_ARLk_YTrYB_r_GGNDxlXVtfcwytHrMwjqQ2dWI,3174
+litellm/proxy/_experimental/out/vercel.svg,sha256=P6XNdXtBjhivxo3eutVfRDIG5BAyeSHdsr8b5zFliIA,629
+litellm/proxy/_experimental/post_call_rules.py,sha256=0tMsQ8ViObIH2wJcEfdWt9CZ2FAkj6HoBIrAr59VvFc,170
+litellm/proxy/_logging.py,sha256=3zwPYBRv2EL1OB8Tk7_O6qU6lXCL2zSBNshe7rfyZbU,1055
+litellm/proxy/_new_secret_config.yaml,sha256=lpsdzQZiPUUd_hJRxQMgcRDNMJoCp9VBS4vDxoSyFD4,68
+litellm/proxy/_super_secret_config.yaml,sha256=go-txuGiBfjn8vrxTYrB9Sto_BRWjnMiTrStJcSh5Xw,3480
+litellm/proxy/_types.py,sha256=IFnuSZv92fBsHPUyl4p1DBmeppbyK-FH8bRGf_4H8ds,55813
+litellm/proxy/admin_ui.py,sha256=x1z0jm_HxQbsOqvv0QE4SXe8HNo_atKTpmhcDnaA8zo,7562
+litellm/proxy/analytics_endpoints/analytics_endpoints.py,sha256=qk2Onr8qR38dp7_JXPSb1xKsWRGgl9IvETrWIFQnTmQ,3480
+litellm/proxy/auth/auth_checks.py,sha256=l6d2-NdiG4XNNgP4EoJQ-ofv9PCKr5brllLPd0lunKU,22437
+litellm/proxy/auth/auth_utils.py,sha256=mqFAyC8PIOoCdYJhkYiLgGe6NazVVOsMFCy4fKzMMFs,7513
+litellm/proxy/auth/handle_jwt.py,sha256=cO1BQ98C_aQvwzEduUmMtq8cztbJkRs0bwR4zTb7rQ4,9052
+litellm/proxy/auth/litellm_license.py,sha256=tBwvC1giDYrcVBBFXZN_ofxozEcIfA3RerH3lLX0TZc,5064
+litellm/proxy/auth/model_checks.py,sha256=GVnSDURnKs0hREyJtSFgBmxyjUJR-1fQdFMmtnqetEg,2397
+litellm/proxy/auth/oauth2_check.py,sha256=vDqfzUnR_ybjdnVhczoz4dOzObqhvzXtzUL2xEh8EoU,2875
+litellm/proxy/auth/public_key.pem,sha256=KlTCQCWViTHUwzzxCu9KyFCX8YTdnIfGJlx7jiotak4,451
+litellm/proxy/auth/rds_iam_token.py,sha256=D7pBrNyjUPhYslBVuV7Wzfs8iotOnSR4d3lTybMveaQ,6198
+litellm/proxy/auth/user_api_key_auth.py,sha256=_MTcomGi6dS53Krw9i5AnPjriQ0UkQdUiOtXkKFcCSQ,55138
+litellm/proxy/cached_logo.jpg,sha256=KQhlR-OWdfaNr0Cz6lQYbMRVnU1I7o-19IKOO92EovM,15974
+litellm/proxy/caching_routes.py,sha256=jWUJQoTX4TcnS7PjPP7H2RlS96xddkpKV-lmFnRK6Kc,6234
+litellm/proxy/common_utils/admin_ui_utils.py,sha256=HMC3onZmlFuidqBBx1qoMXcsHie-JFoBEgLkDGPVJfQ,5908
+litellm/proxy/common_utils/callback_utils.py,sha256=XZfd1Rc8Utqwcv56lFFRP1lPefPwBvLOSEyS6dZICFE,12526
+litellm/proxy/common_utils/debug_utils.py,sha256=_0DIq80jHcX8hwrj-JQyoYP6RtbD4z2CwxOYEtvSNwg,7865
+litellm/proxy/common_utils/encrypt_decrypt_utils.py,sha256=JG0Bq-P4RyCQxEGkryDG1u2U9OykDm4DseTVt8T_RAI,2815
+litellm/proxy/common_utils/http_parsing_utils.py,sha256=PGpF474ULWwu41d28ckpJeyG_gTA-FqYgbyZssPKH1s,3415
+litellm/proxy/common_utils/load_config_utils.py,sha256=OTB-yO5u7-5oVbRMQImYy1KVWn-HcNjRe78kj7Q6OdE,2100
+litellm/proxy/common_utils/openai_endpoint_utils.py,sha256=cKo5fw41r09HDscxibDKUqpI7bUd1QQjd306NI1N_C0,721
+litellm/proxy/config_management_endpoints/pass_through_endpoints.py,sha256=f7RT0Q-mioZ3E-Khzgpb5g47gd8nq6DpVfd6HqeqnR8,1057
+litellm/proxy/custom_callbacks.py,sha256=cOVm_N7CZ7lJWJe0azBeeObGNmv55yddGxEsur0GqDQ,2673
+litellm/proxy/custom_callbacks1.py,sha256=IJzB6S5HysU3kcpkhKuZ7lQTHFOVBrHvMs3XqIflwjo,1779
+litellm/proxy/custom_handler.py,sha256=XCMLmhsbB1GL46Lih0V_1qHQF30KcCAQv6JTaI2enSs,675
+litellm/proxy/db/base_client.py,sha256=JAg-ghx1qLNuxSRSn0B6Y_BB7a1ZIINNuvjOTJ_aByQ,1129
+litellm/proxy/db/dynamo_db.py,sha256=_UxzvCgnmZt90iR7u9kF0rGE_v3Gzhr986QZ5QkBmFA,16392
+litellm/proxy/example_config_yaml/_health_check_test_config.yaml,sha256=DcUpvUly3ASBh57fdv51uZ5Nr7a3o7f7j1sQebILtjQ,512
+litellm/proxy/example_config_yaml/aliases_config.yaml,sha256=mN_iQHMZBv6CWXLF3BAOc-sdRrLKcFnWRbJIDXePXcA,1225
+litellm/proxy/example_config_yaml/azure_config.yaml,sha256=swb4kZv8EN6IfTW8G_uOFqjzXtcMxUpbf7Lz7G_GHS8,747
+litellm/proxy/example_config_yaml/custom_auth.py,sha256=4Gm2Jk3BtuGxSAargCdoR8qBQvVYJkseR0MomBJFXlk,1586
+litellm/proxy/example_config_yaml/custom_callbacks.py,sha256=BHiYN-Jtpf42_d5mc_G34yRuKMsQCGNYCq0RfhP3x2c,2252
+litellm/proxy/example_config_yaml/langfuse_config.yaml,sha256=jkBz0zM8bUEBb_gmHi5P0TuFyC0WYlyGa37-WVRdsAo,181
+litellm/proxy/example_config_yaml/load_balancer.yaml,sha256=hz5tnS6TvE8P-qU3pZ-SspqMB280EtrSwMZvjEca3sg,886
+litellm/proxy/example_config_yaml/opentelemetry_config.yaml,sha256=u7-6jPVmj2Yca7nTeu1ykDZzzdtGKcGj3v5Y557Fc00,192
+litellm/proxy/example_config_yaml/otel_test_config.yaml,sha256=brqGCEvPSIMPBJTwF70M7zdbv1XXNb2PmdQFHmkv-fk,247
+litellm/proxy/example_config_yaml/simple_config.yaml,sha256=OBODVvCc0814U8-YTmiwT7C4UkSjLN51Bd0HxDenTVg,88
+litellm/proxy/fine_tuning_endpoints/endpoints.py,sha256=IM4JEwayis5UwIArgnNf06fzohyfafvbRGxGmJN-1W4,14199
+litellm/proxy/guardrails/guardrail_helpers.py,sha256=24g8oO47L5Hd_vFz1Q7uycd6Yc4HaDdzSGGkesDvyZo,3975
+litellm/proxy/guardrails/init_guardrails.py,sha256=Q7VdOyUe6WBNcK835IRkeVh5NChnB7RPSgn3N6VtDpI,2674
+litellm/proxy/health_check.py,sha256=DFXFyT249ksTDYQUsSLdm0jzStVOoOBmMDcFs_IYfpU,3271
+litellm/proxy/health_endpoints/_health_endpoints.py,sha256=cBFpZJcxeUaA9GMN3ByUkzDDP6mxzUxtcXFThUCVl2A,20440
+litellm/proxy/hooks/__init__.py,sha256=Il5Q9ATdX8yXqVxtP_nYqUhExzxPC_qk_WXQ_4h0exg,16
+litellm/proxy/hooks/azure_content_safety.py,sha256=NI82k0Lu8X8xHHNDiifMQFhl4bAcPZlCt93J6zOPQgc,5620
+litellm/proxy/hooks/batch_redis_get.py,sha256=1NkleF6uwY5OkncUWhhceWqWZ1abjCJgBwtOdHWp1HY,5263
+litellm/proxy/hooks/cache_control_check.py,sha256=_2oyTBCOsyS8pJKis8U4boIA2DqXc9d15rCWQpANG6k,2213
+litellm/proxy/hooks/dynamic_rate_limiter.py,sha256=vIXBFsKQ7F76_7QZ1Rc7_Nsxas3kqYDQ-iDSKqO-BO8,11850
+litellm/proxy/hooks/example_presidio_ad_hoc_recognizer.json,sha256=VZLbOsMKjmQRdigSjZ3Rn5PJiizWV0If4_kGq_gH9DE,756
+litellm/proxy/hooks/max_budget_limiter.py,sha256=tUZGcX8P6jgpxbSxrEMUoRDH-pTqpQjwGO3cGphuIm0,1640
+litellm/proxy/hooks/parallel_request_limiter.py,sha256=WbTNCB1yn49jUipUf8AOBhFr_uheZvTnwwuaLWTEd_k,27226
+litellm/proxy/hooks/presidio_pii_masking.py,sha256=R8J2dZpRqx18AqOiWsalC4FH5RIiUQLPxdhVS2gmXaA,14810
+litellm/proxy/hooks/prompt_injection_detection.py,sha256=IApaMoXa7lcCAm-u856cu6y0InjgXVkZDK9zWUdpkxk,10251
+litellm/proxy/lambda.py,sha256=h_06oqJhK3tkvnKOmxe7VLtPuIJIsosJE07BFXzF7sQ,107
+litellm/proxy/litellm_pre_call_utils.py,sha256=mvuCOsURF0pMJAVOc9vnw-2Zdcs-XVMLYERB2rjbsDs,13345
+litellm/proxy/llamaguard_prompt.txt,sha256=tCel8OPpD7IybjAulUqEg4QhJBdXKGThiv6J4DoKJFk,3300
+litellm/proxy/logo.jpg,sha256=ZnPgg_2nBqNuMuqW2ZSrWNISVaK6HiSuNB4e5xttQto,24694
+litellm/proxy/management_endpoints/internal_user_endpoints.py,sha256=4EGdTFEBuGCaP2Ici7k84Nbv2IbRNc2IkOXmHQ3bhYQ,29723
+litellm/proxy/management_endpoints/key_management_endpoints.py,sha256=0K0Ig2iTQNybl_-3NRaS5SuGXOQFnYDQXAsDsB4H_gg,39494
+litellm/proxy/management_endpoints/team_callback_endpoints.py,sha256=-EXxZRkJ1zptyhh1OW0QD5mLmrSng1KF5hMP5BjOW2c,14318
+litellm/proxy/management_endpoints/team_endpoints.py,sha256=-grVzx0Vp1_yNuwpcaaYhM4DEeDr5rwb8uDj5gMrCLo,32827
+litellm/proxy/management_helpers/utils.py,sha256=NvnpYaPBopH_nym7gWhiCFLaaVLE_pQ-8j0P7hIjABI,12559
+litellm/proxy/openai_files_endpoints/files_endpoints.py,sha256=MG_NJAa_CTgbP9HRzrH7r9FtHUDBH3awSo7roer17EQ,20800
+litellm/proxy/openapi.json,sha256=MJrfO9l1MFZmvPnXC77LzUJojMwTkAiFU4whrntKA-4,7163
+litellm/proxy/otel_config.yaml,sha256=Fnu-KSsikVFOve9ev6mjKNRMTisOCn-SjTCRoLe1dZ4,625
+litellm/proxy/out/404.html,sha256=QqEaqZOsCgQ-bNIz0UkOKQkATFA0_ZSNw8N_dWRhSEo,6645
+litellm/proxy/out/_next/static/bNZcj3BOVnvWu7auSxFn-/_buildManifest.js,sha256=1ZIrpmVMAUNAG9YfFRaxhQ_0tNDBUuM6anTEgxedIAU,224
+litellm/proxy/out/_next/static/bNZcj3BOVnvWu7auSxFn-/_ssgManifest.js,sha256=Z49s4suAsf5y_GfnQSvm4qtq2ggxEbZPfEDTXjy6XgA,80
+litellm/proxy/out/_next/static/chunks/131-cb6bfe24e23e121b.js,sha256=PuBjUDy1cjBTTKd60Ctu6FBVzkqDzrEtQYg7B7atiDA,681365
+litellm/proxy/out/_next/static/chunks/2f6dbc85-cac2949a76539886.js,sha256=L-FNXiIqsiBouoQ_haADmDIgNst-SPCs1iQhLFTNSKU,31064
+litellm/proxy/out/_next/static/chunks/3014691f-b24e8254c7593934.js,sha256=5vmdZfOJ4jrMbHo5TPKda4yuzYkai8ohJGTW4V2B2oI,725
+litellm/proxy/out/_next/static/chunks/505-5ff3c318fddfa35c.js,sha256=RlyUAsnfKQvWgPhkR2YKjvG6FiXNaMwIMdge7oC6UM0,327591
+litellm/proxy/out/_next/static/chunks/605-8e4b96f972af8eaf.js,sha256=P0WKRKtSxBAoWrYXjst0lQNpkP-145DaytbbGgrzeus,1015030
+litellm/proxy/out/_next/static/chunks/684-16b194c83a169f6d.js,sha256=cNjHfykyxpnQyi-NpQMUID_fTfjX7qozLPEa0LRtGSY,49660
+litellm/proxy/out/_next/static/chunks/69-8316d07d1f41e39f.js,sha256=lAalMsBcPUObqwncSYLplvSN_d0ercbP0lsldZBedlc,113335
+litellm/proxy/out/_next/static/chunks/777-50d836152fad178b.js,sha256=Xzm7fyD3wuRH1iPlPxn8qKtNaOmb8kRu_sQlfT4TdGc,28836
+litellm/proxy/out/_next/static/chunks/app/_not-found-4163791cb6a88df1.js,sha256=vRsMkO0OtbtzFDUnTLKV0chuVk6FfeQxtwfnvu8HyiQ,1775
+litellm/proxy/out/_next/static/chunks/app/layout-e379310fa648921d.js,sha256=beGf6MhgLrjZ6TnrQM6xeDmS00RJGpoIF07wkU0v6dA,420
+litellm/proxy/out/_next/static/chunks/app/model_hub/page-39740ed413adc0ec.js,sha256=jK3HuUKJiH_BdodqtJpJ6gA9-OjHX1xgcqUVSQQCO18,7568
+litellm/proxy/out/_next/static/chunks/app/onboarding/page-cd0662400289603c.js,sha256=ZG7ANPSvcOZWIPOS0kiJXrB_mMYtMcbkCnvaUN7v0-E,2788
+litellm/proxy/out/_next/static/chunks/app/page-5b7e51de07f1b89a.js,sha256=knKqEGuXueppVaBnD9EC4vivpRCA2w4_ETDKvfT6Oew,166748
+litellm/proxy/out/_next/static/chunks/fd9d1056-f593049e31b05aeb.js,sha256=QI0CKrpO8VBSIcDy-QBfPdQhTKoEXsIhfBXiQbnI16M,172192
+litellm/proxy/out/_next/static/chunks/framework-b370f160bb96059c.js,sha256=rdVv5xb7IDglnNf8MXneJb9zkELY_b3ITKCE5ThunrQ,141006
+litellm/proxy/out/_next/static/chunks/main-a61244f130fbf565.js,sha256=zlIcVUCT3XuOxqLsd5zTJ0JGu0Lwt0TLGBIG9mPbPqM,109705
+litellm/proxy/out/_next/static/chunks/main-app-096338c8e1915716.js,sha256=6rTapQL-ul47TUSRitJ-HOTm4mJHSX5T5si3T-gYj-8,470
+litellm/proxy/out/_next/static/chunks/pages/_app-d21e88acd55d90f1.js,sha256=6Tfqn-C_NCH5A9z56noXA-XhmIzoyHHa8hNefPRJVwE,284
+litellm/proxy/out/_next/static/chunks/pages/_error-d6107f1aac0c574c.js,sha256=eCsfmitISoEXoWoho7nSup51aVxMHxTUS5HZqTniPMg,249
+litellm/proxy/out/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js,sha256=AiXrA00CSgO9yQ6mx59WGTZi58Pu6QlpYpiCDlF8u4M,91460
+litellm/proxy/out/_next/static/chunks/webpack-2d07f7d2618f9dfc.js,sha256=CHjpYfsFltVNfHacPI3qkHBCjF953WspddbcjL4-G5c,3833
+litellm/proxy/out/_next/static/css/051d7321f11572d4.css,sha256=ZAr7WQ7FH8lTC0yesXVuh_vMky3_6lwGITrPD74M3hk,435002
+litellm/proxy/out/_next/static/media/05a31a2ca4975f99-s.woff2,sha256=aWbbPArZ7qC4X5FSGEnMlyRNgDekNM95lriseseglDc,10496
+litellm/proxy/out/_next/static/media/513657b02c5c193f-s.woff2,sha256=KG1HaV7h2Ir6RPfhBeM8I_RblOuOc97RtgqRQ5ZR5uA,17612
+litellm/proxy/out/_next/static/media/51ed15f9841b9f9d-s.woff2,sha256=biS-z0aPL0ZtGGo4W6reWB_jwASs7DXZipKkSSFgnHw,22524
+litellm/proxy/out/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2,sha256=O88EyjAeRPE_QEyKBKpK5wf2epUOEu8wwjj5bnhCZqE,46552
+litellm/proxy/out/_next/static/media/d6b16ce4a6175f26-s.woff2,sha256=Lo2OA4Fs4kgf_PLDbklFXlDfaFQg56qwljRJCa1pTY4,80044
+litellm/proxy/out/_next/static/media/ec159349637c90ad-s.woff2,sha256=In0Dqjy59njqtSDIZhWICXhLuULHbySMImbKI87FhUA,27316
+litellm/proxy/out/_next/static/media/fd4db3eb5472fc27-s.woff2,sha256=ZEFW4NV-s7WzXwBryUCTH_cG3DoOLQkL9bRwuf_ojRY,12768
+litellm/proxy/out/favicon.ico,sha256=Ikbq6HOjEekHeOnx68AzqSl19Y5YPKwdQv7FyKK6Q8M,15406
+litellm/proxy/out/index.html,sha256=nIBZl0GeNsWHxd6s6LuNu-V_3XC5tFj4vNeH7mkSt_s,4970
+litellm/proxy/out/index.txt,sha256=HBcRQgyWV7LfgmB1SeCbZUu7gnverOd0KKGxo6OeHmQ,2814
+litellm/proxy/out/model_hub.html,sha256=fH59PU_X4fOmPxxl3Kd-Qd0C-_06jnn7nh6CGunuFZM,5344
+litellm/proxy/out/model_hub.txt,sha256=hsvGZaMdDhcckP22g6FZBor2cg4HfHrTk6un4beB0Ao,3121
+litellm/proxy/out/next.svg,sha256=VZld-tbstJRaHoVt3KA8XhaqW_E_0htN9qdK55NXvPw,1375
+litellm/proxy/out/onboarding.html,sha256=EpT9xGR3e9fzchtZeDcg5GXQEUJbXr_VAJdHKDgC2rs,5402
+litellm/proxy/out/onboarding.txt,sha256=-4Vps5sF6CVeOqIZXS4Cqan0FdzMpOZp0VDb7Rug_Lg,3174
+litellm/proxy/out/vercel.svg,sha256=P6XNdXtBjhivxo3eutVfRDIG5BAyeSHdsr8b5zFliIA,629
+litellm/proxy/pass_through_endpoints/pass_through_endpoints.py,sha256=HIAuLXBIy-kYRj2_ror_HKHLaf0Q9CGyHDRDekzeb1Q,27074
+litellm/proxy/post_call_rules.py,sha256=bbnqX3BXhKjvbRN6LdZIwndKMCh88i4a9BXkTzsaHVk,359
+litellm/proxy/prisma_migration.py,sha256=_Rsx8d8J7zF4pdVXo5vqP0OsNzoGg7fXXyZKqDxLht0,2264
+litellm/proxy/proxy_cli.py,sha256=bXMgaLCbCO88qR6icLfreszo7FBjk4BqXj_NVt6NQgQ,25901
+litellm/proxy/proxy_config.yaml,sha256=yv8uOLwPzGviTvtYRawjOxQaQ8sqbHbgcNNJ6g3mN9E,1637
+litellm/proxy/proxy_load_test/litellm_proxy_config.yaml,sha256=S5cDYau2btdo-VKyBaGqjOl9hPPolttvKx45M78e07c,158
+litellm/proxy/proxy_load_test/litellm_router_proxy/Dockerfile,sha256=ojfWGjipZHfYWaaxUYV1Tygah8UHR_iC5dTloBvkqME,457
+litellm/proxy/proxy_load_test/litellm_router_proxy/main.py,sha256=lc8tA_IYqIhOE1JVu7dtAKtnEYObmohjjsGksfoydFk,1567
+litellm/proxy/proxy_load_test/locustfile.py,sha256=r5Y93HQeiuBlsiN73jwb3uNJOG2HfroBpZq9S-_TKc0,1082
+litellm/proxy/proxy_load_test/openai_endpoint.py,sha256=d16VI9g_554QCovlRUm9EFRk7KfMrIVW5SZhXPHGbN4,1412
+litellm/proxy/proxy_load_test/simple_litellm_proxy.py,sha256=v4ywNlkgZ2GQH6XHiwPTR-LbTuEqb0PBBXa0IFliDps,1301
+litellm/proxy/proxy_load_test/simple_litellm_router_proxy.py,sha256=lc8tA_IYqIhOE1JVu7dtAKtnEYObmohjjsGksfoydFk,1567
+litellm/proxy/proxy_load_test/simple_proxy.py,sha256=a9FuXHZq_10zIXkW7JtIEtO2ORo3dyoXcf-33zunE20,1267
+litellm/proxy/proxy_server.py,sha256=MVJlTtCxwgHPG7tJ7bIWiRtBXqSxKCFe8yKq1QTiaiY,362456
+litellm/proxy/queue/celery_app.py,sha256=biT-emisvGRb697qPMsAXyphfFpCUHlzRGesBeZ3Nv8,2612
+litellm/proxy/queue/celery_worker.py,sha256=Sz5zAZXcU96WbkFOsGxFaaPKJtmz8F4JIsWA477eMwQ,324
+litellm/proxy/queue/rq_worker.py,sha256=oOD_1LuoJw2pc-D8aHBg7kN0uCR-piXqBGZbfJSdbLo,957
+litellm/proxy/route_llm_request.py,sha256=C_NDJs5Nst9PPr5s018jCF3isu9uGdLoRpAf9YehYb8,3387
+litellm/proxy/schema.prisma,sha256=xoC6o4xE-A33Bb_45ztnnWE2kP3ED5gmda1DrmR1jsU,10345
+litellm/proxy/secret_managers/aws_secret_manager.py,sha256=oS0pYj6Wpu0PkwlEifDlJ9Q7FsP3mDAZjoRAvVGZxvI,5306
+litellm/proxy/secret_managers/google_kms.py,sha256=QbvkHgeZE3PC4OmVOvlqhT5HOuG1B-3WvcLFconQdk0,1284
+litellm/proxy/spend_tracking/spend_management_endpoints.py,sha256=pxa8yzSbpgxfZymtje3k5ybTOjQ0Bv7Go8QQAQ4Qah4,67309
+litellm/proxy/spend_tracking/spend_tracking_utils.py,sha256=zVWVpV6SZ5pQoh20Wv4zyl6J2U4LNZy_8z8MFisSExY,4376
+litellm/proxy/start.sh,sha256=qFUFqvhcEIMyL3Bp9vtAtLvY0zjyLw6lHTocHqpLE5w,32
+litellm/proxy/tests/bursty_load_test_completion.py,sha256=qdH9bbEO-semCr4rqZa5fz-NdO5G9IYx-okxmwUXec4,1506
+litellm/proxy/tests/error_log.txt,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+litellm/proxy/tests/large_text.py,sha256=jNJ7qOSbKv7awjMWUvnyjL-x3fa8Uu6tRbR_VYViZtE,24566
+litellm/proxy/tests/llama_index_data/essay.txt,sha256=8GEx5TIILYFGfoemBcNEITbObOO-f0cR7dyLC0wYDMg,75041
+litellm/proxy/tests/load_test_completion.py,sha256=j23t4zg3Q2BFhSy563wqauadC7TRLjkC5NhE4KE9pw4,2103
+litellm/proxy/tests/load_test_embedding.py,sha256=1_5D3sIOKu9l1Y41b-SB_al7-HTViwdyKStJVB9C7pU,2972
+litellm/proxy/tests/load_test_embedding_100.py,sha256=qMiabdAchfp2PkbqxXyEohZmKDGxUu3ZobV1YSIr52M,1573
+litellm/proxy/tests/load_test_embedding_proxy.py,sha256=u3PJSuaVwex1cY_7SrhePMHD1Eu2nOZBE9pCK1Pd8mY,3014
+litellm/proxy/tests/load_test_q.py,sha256=lu5ima-DhIN5zy_VkuxK2CiexNUANI7j1bvJFsMWHEY,3939
+litellm/proxy/tests/request_log.txt,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+litellm/proxy/tests/test_anthropic_context_caching.py,sha256=XdO18lk5YyHSmTMCQXefHsgYtLRsXt2YTIelifAlrpk,1039
+litellm/proxy/tests/test_anthropic_sdk.py,sha256=JUSv3o3UuHXf_FYClerSsTigR-vbadtBiY6cIS7kxm0,481
+litellm/proxy/tests/test_async.py,sha256=yZoDqz8JI9JDIo5dGAk1ipIDncy2-KL3jvSll0QF-vg,739
+litellm/proxy/tests/test_gemini_context_caching.py,sha256=3fWPDtTqwmwDgdoBrlGlO2kSnhnP66a2svouP_w5fBE,1575
+litellm/proxy/tests/test_langchain_request.py,sha256=tkOxaPitlISJt570nseQahU0MQLZX0RFZg7HaahmNos,1187
+litellm/proxy/tests/test_llamaindex.py,sha256=uKSIB18t1fny0rr_JuagEg4zJnl-DBGTZT2uI14bF4w,1041
+litellm/proxy/tests/test_mistral_sdk.py,sha256=_j6p3_4bPZh6o7yfwO5pG5cnfbWNuRALK2ycOjvsGtg,403
+litellm/proxy/tests/test_openai_embedding.py,sha256=KiDpGjzOJFqr_uEKjDY0XyyQPYfL8PsMUynZH4lPnFg,2541
+litellm/proxy/tests/test_openai_exception_request.py,sha256=Ka6yYycaDruKIw7ckBKSw2-Bqsp1XeTP8JeTfGTlyDk,1391
+litellm/proxy/tests/test_openai_js.js,sha256=R5kHG96bQIWpJSNMRgtjr-vK0eUSURv-GBc6GeEpEOs,945
+litellm/proxy/tests/test_openai_request.py,sha256=RaUPtc6341adnYyQo4b9KVW42dAbnAYlrgFn2MNBIBM,1763
+litellm/proxy/tests/test_openai_request_with_traceparent.py,sha256=FrTdzhWyUBc8f4P15Z7OlWDI50LQD5C4waIerNJgEuM,1653
+litellm/proxy/tests/test_openai_simple_embedding.py,sha256=41OITmgUD_RLb84O56ym55cyosZKU0mCAZMQbEfpmYA,289
+litellm/proxy/tests/test_pass_through_langfuse.py,sha256=jfFlFyisJv36vX1UAkTEEiadQn_V4Of0H3Quhbbden8,338
+litellm/proxy/tests/test_q.py,sha256=B3C8Z5vhnF19FmA682XtKqJcRuL1K9xFZvubbdElQfo,2514
+litellm/proxy/tests/test_simple_traceparent_openai.py,sha256=8aW31Ui2Lpz3hf6pIP0E0odMoRdPOeYL8pglj84nI-g,595
+litellm/proxy/ui_crud_endpoints/proxy_setting_endpoints.py,sha256=RIo26sszKZqhy2-h8OpT3r3eIiNahMtyTgQfQ54xw6o,3515
+litellm/proxy/utils.py,sha256=LrVV2SMJrPlgX1vcy-4jVEV8HOb7HOQm-sS7004QCO4,115344
+litellm/proxy/vertex_ai_endpoints/google_ai_studio_endpoints.py,sha256=lpqD5GyibzJCJnMDvcSkCOqLmU7Xgb234AuyIED5QVg,4149
+litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py,sha256=3Go1gGVvk3B0sDRWxznTKlhS_HGuYKCgMQjcsIvbLWI,10460
+litellm/py.typed,sha256=bKPUECNwNtN5PZk1JYgrtM4QFbfIniqsIVGDL0oKMuQ,129
+litellm/router.py,sha256=H2t9Kqrtz5gDQsvSA2qTAlcDAqhcs8Sc7oItloqNDlw,211688
+litellm/router_strategy/least_busy.py,sha256=wfupoyg5nvuWovelCE5aULs4XsOZ02sAfsQTRid66ww,8093
+litellm/router_strategy/lowest_cost.py,sha256=ZEU4KpR4rXzR6n1EDuNirWxQ9NotvqbeK73wJT86n_0,12888
+litellm/router_strategy/lowest_latency.py,sha256=cD-qnLTp2RK2zKGI6E0J837GHvuDEkNy0o5fPkc9omk,20210
+litellm/router_strategy/lowest_tpm_rpm.py,sha256=_U3FESx3sg-YimJPG1zx3j2GPvmSoE-fnau48JJLFgg,9281
+litellm/router_strategy/lowest_tpm_rpm_v2.py,sha256=Qz_U9oR7JAyOdeMorv615DG81frUOaQga4Ruog34CuQ,24904
+litellm/router_strategy/tag_based_routing.py,sha256=nOMA6MWnp9HxdJamlpuJ3HPgdYS71SCC3YWHTNTJjls,2814
+litellm/router_utils/client_initalization_utils.py,sha256=kIDmvYHkocnstX_uioH-GQKhkGx1L9ydDDIwaarwUDg,21660
+litellm/router_utils/cooldown_callbacks.py,sha256=r1UhQ4MlfPjwiUASIwnL1Fvkrt0zy5NQS4D01-pfNIY,1827
+litellm/router_utils/fallback_event_handlers.py,sha256=mh35Olzu2b7e54ezcPyrxda7iyCneVwjvil_lxK6KxA,1227
+litellm/router_utils/handle_error.py,sha256=5qMHc2XrCnmRsqeEtNdIGFJiYqEiqLtXtfE9Jm9QrVU,1531
+litellm/scheduler.py,sha256=V9-Ucc-E9CzjvTJiM1hBdgks8KjGaT--lOy1-bTNFxM,4476
+litellm/tests/.litellm_cache/cache.db,sha256=baRpphicsnVHsgqvlDbqxqB7hftsK3sAmkMLoJygBxA,32768
+litellm/tests/adroit-crow-413218-bc47f303efc9.json,sha256=usPH8ctWP1zQoyDl2u1u0BIbikk-YJWqHngQUQrA9c4,608
+litellm/tests/azure_fine_tune.jsonl,sha256=dzUe6F_pSfA5b3T1MueC7wcquoJYxBiNV9WcebADRhw,3151
+litellm/tests/conftest.py,sha256=hODXEXJP6x-7xweXZBKzdnljZS7-MpgAW7JxydI-Ouw,1575
+litellm/tests/data_map.txt,sha256=-4_fGs0ltj5Hl3btweqXE3aDxcMi-Dpc8a2iW26aG1A,3939
+litellm/tests/eagle.wav,sha256=TAdGabgjNoGNszBsSuYbOzjqaNczI9XyNf6rMTSafu0,55852
+litellm/tests/example_config_yaml/aliases_config.yaml,sha256=LdCrc0xUjYAa4h0ssz7zjkMXSwUYK2Vv_0LLTNRDzts,1231
+litellm/tests/example_config_yaml/azure_config.yaml,sha256=ulSpfDLCLdiNoTcAjg6ejTms_wuOlseIfYbpHzWCC5U,443
+litellm/tests/example_config_yaml/cache_no_params.yaml,sha256=r0TniRcIZgkzj_vPpkCn5wzRXaWOI8Ct62pCXlhfjIY,127
+litellm/tests/example_config_yaml/cache_with_params.yaml,sha256=EYsKHuqGwI_I6zF294Bq9sYa-KMFAVeLSmXtG7Q18AU,248
+litellm/tests/example_config_yaml/langfuse_config.yaml,sha256=jkBz0zM8bUEBb_gmHi5P0TuFyC0WYlyGa37-WVRdsAo,181
+litellm/tests/example_config_yaml/load_balancer.yaml,sha256=hz5tnS6TvE8P-qU3pZ-SspqMB280EtrSwMZvjEca3sg,886
+litellm/tests/example_config_yaml/opentelemetry_config.yaml,sha256=u7-6jPVmj2Yca7nTeu1ykDZzzdtGKcGj3v5Y557Fc00,192
+litellm/tests/example_config_yaml/simple_config.yaml,sha256=OBODVvCc0814U8-YTmiwT7C4UkSjLN51Bd0HxDenTVg,88
+litellm/tests/gettysburg.wav,sha256=djDa_7Lyjyck2B8f8gOetppfo2DbORlyGncDKljbDUY,775192
+litellm/tests/langfuse.log,sha256=_RcDsToj93hWVmFtgC97cKfNNJYtPiGewDzDLdZFrME,50483
+litellm/tests/large_text.py,sha256=jNJ7qOSbKv7awjMWUvnyjL-x3fa8Uu6tRbR_VYViZtE,24566
+litellm/tests/litellm_uuid.txt,sha256=wsz3BXCEmkNYCru3g5llvm_aimwDqXZ4L4w70Q1XmRQ,36
+litellm/tests/messages_with_counts.py,sha256=SNLS89VExANkR5wHFbXi1WUGbnSQWZA7WC5EaPyy3BI,20196
+litellm/tests/model_cost.json,sha256=G9vH2an8oTeVxoCsrpzuq_-sIDZkav_xUzC6UwOqFWw,37
+litellm/tests/openai_batch_completions.jsonl,sha256=o0X_l2BJ1cMY5Nclmemt2oZUPq3TMd7bpTSfvVHl4H0,512
+litellm/tests/stream_chunk_testdata.py,sha256=BNhI5wfTOagJner4oCvv91HJd6rJdSFyJMPACz9X3v0,16955
+litellm/tests/test_acompletion.py,sha256=sGw8TUWNENtFSj7ouBg1bqfz9OYhRHbI-xIIYKvE478,1059
+litellm/tests/test_acooldowns_router.py,sha256=aCE0F_3DOx4-IinDR4pV6MhV3GbHA4LRZUgaWVATKIA,7026
+litellm/tests/test_add_function_to_prompt.py,sha256=YpA1hc46_ICm8C8UgYZnNECsAk4gcoOGC8jgeSG8MK4,2719
+litellm/tests/test_add_update_models.py,sha256=qiQA5fbTEISRcMcOzWk-L61Ioi1N-H-Mn3rr6v56Xh8,6458
+litellm/tests/test_alangfuse.py,sha256=lewv4YUQQAz7ie4Q5Y6x2P3wW__VgIqeioaaHLjsucc,32791
+litellm/tests/test_alerting.py,sha256=iV42tXjOgCu8qGdTS46_V6Ad_P0MBUnMkYOpQTvLglA,27059
+litellm/tests/test_amazing_s3_logs.py,sha256=XQLgBrx7PcnGfo7oe1oaYOiTMKwJgVglX7cJ-nwXHR0,8509
+litellm/tests/test_amazing_vertex_completion.py,sha256=OtpzvkxLiCG0aB2efz9qLddT4Cr08qMa0vUzjPmQs-o,77276
+litellm/tests/test_anthropic_completion.py,sha256=a--VDKfZv88KbtpEZHaZBXfcqpiNCvtX0dK09EduP4c,15483
+litellm/tests/test_anthropic_prompt_caching.py,sha256=mDiMeksMbiMwJuq-etiWl74VrAkC71DuJQjYomOqhLY,11137
+litellm/tests/test_aproxy_startup.py,sha256=B2ght7sETpuR7ey779uxeRbdNM-TS0upWUW0H3g6fQc,3111
+litellm/tests/test_arize_ai.py,sha256=tDjDN9MTBtCWDXdcoo2guDyRqhFbZHukuqbodOtABrU,720
+litellm/tests/test_assistants.py,sha256=7VihcCFIzmYbLyYO7ZhpE2ZFMuieIgTxYR_dZpAQ2bA,9611
+litellm/tests/test_async_fn.py,sha256=GQ0tlSx7CEAVhonQPKE1KVU1eTElpnW3txnl_Tb7xro,10796
+litellm/tests/test_async_opentelemetry.py,sha256=aGLaHT_KzyjSx2u-ZHfmtPNeFgPHdoGDBde9RtHvOyk,2510
+litellm/tests/test_audio_speech.py,sha256=QPmpEDxcLyH4g-Pb5MS00X8W3PewhTsMxvPvaS1wUns,2865
+litellm/tests/test_auth_checks.py,sha256=SV5fUV8Saylsqv62joCmpEtHpyH6gAQgrfF_kXTWu-s,1993
+litellm/tests/test_azure_content_safety.py,sha256=HdfcdCqmiofvukvBgvhkgNrqbZkMxwEc-QpiGezhRuw,9358
+litellm/tests/test_azure_perf.py,sha256=JyzeufbSBdom-bCNqWm2Rdt_s8qqlFULpQjnNaeGXUY,4050
+litellm/tests/test_bad_params.py,sha256=-hnBLmuw-UDT6SUnvjMHJqJaWyUBk7O4TjdO2NU7sgc,4776
+litellm/tests/test_banned_keyword_list.py,sha256=ufooccsKEhWoNAvlTO6GIZdG2yvQwUpQq-r4SCjNst0,1969
+litellm/tests/test_batch_completion_return_exceptions.py,sha256=HajZCg0M_1kfyoBeamBCSbm3Cdqm1zGV2qJzHazvk1Y,665
+litellm/tests/test_batch_completions.py,sha256=f2tQwHXLb83-zqOdKhCRc2naIMsl6gPD9Qkc7S6T_TQ,2087
+litellm/tests/test_bedrock_completion.py,sha256=3jO7kWBcJduRwLuvHqpiLQyl_JWdx9G2MtBNSizqCxE,43650
+litellm/tests/test_blocked_user_list.py,sha256=W3C0zh7B4PQ8UEecwSlSqUoJ08anjH-0ts2m12UsiGE,4593
+litellm/tests/test_braintrust.py,sha256=_US0Ru-24jGsIG21zSQQ_pHOVkRCF5GTyzlL7xwQLCY,1174
+litellm/tests/test_budget_manager.py,sha256=LukzeqTWLAHJknTOwezGfpxj1Zgxmh9L233VNFkVQjo,5087
+litellm/tests/test_caching.py,sha256=5_Ic5DRYc9_K6Kh21qNOLJAcfLBJ4PC5rIhMKb83IZ4,53148
+litellm/tests/test_caching_ssl.py,sha256=-MCNRgOCwp_Ji58RwfQMkrhGRL8W7Eu6p84HRVJHYuU,3366
+litellm/tests/test_clarifai_completion.py,sha256=ZvZpv3AMocLCwBaigVMHz6BByVaG3USRiGWegkFd9Qw,2867
+litellm/tests/test_class.py,sha256=ZEx56yqsgbXZvsd7xNSYXHZFfA7XZ8o1n1Ga_vruk3Q,3141
+litellm/tests/test_clickhouse_logger.py,sha256=B4dXn1bF2GgcS6VLzFxI9cYPdn0CK7IXysvG4B3dwdc,1055
+litellm/tests/test_cohere_completion.py,sha256=OKwuVTAc7-Fc0kxP6JEEMm9TVXZgPMOiQjyYf6drVu8,7575
+litellm/tests/test_completion.py,sha256=bGMPqDhDzGVc-m4Voh7ctKxs2O3pGSJA3fxd8Ysdpy8,138792
+litellm/tests/test_completion_cost.py,sha256=cgYk65bHW-jYIp8aoOvOTxvFYzH4TfSITVppZ5iiigY,35202
+litellm/tests/test_completion_with_retries.py,sha256=F4V2Y81JeuZ9E2uJZYYF88zHvOCFmbTZoDdWpAnRdx0,1475
+litellm/tests/test_config.py,sha256=Mm9fxgrVugZoEpNIxr8onWPp7lMq58koReBA9hX6RHc,8412
+litellm/tests/test_configs/custom_auth.py,sha256=udRnEgzdTut_bjuXpA7bFP6xjFVuJpVulof_pWxN1mk,679
+litellm/tests/test_configs/custom_callbacks.py,sha256=GPYybpwRV-EqtYwHZGQe_Y4_FQN3IOsC0RTGMJZDQj4,4483
+litellm/tests/test_configs/test_bad_config.yaml,sha256=0r2cBcZlr3uEow6TEOHCzR-1ITuxi-4ZounqYaBhK7M,603
+litellm/tests/test_configs/test_cloudflare_azure_with_cache_config.yaml,sha256=kllqMcmc9lju8KnXmXC6HgHBg6MCNJVB6n6P4Z0rl-s,779
+litellm/tests/test_configs/test_config.yaml,sha256=eKoeKY-hllVq8SVgKIgWQCstsVjKfWikg6cYPIQ8frY,869
+litellm/tests/test_configs/test_config_custom_auth.yaml,sha256=wHVCsrxViy45xUFof76Ao_ZKzHO4kLuIx3UTqczDfBs,216
+litellm/tests/test_configs/test_config_no_auth.yaml,sha256=DNlGwiHczgrFeCz--bhVMN9SJKvHOF8XlZlw69xIrVw,3916
+litellm/tests/test_configs/test_custom_logger.yaml,sha256=sGHWRf-Go-TfMqw-TlQeSeP_ImGIYSA-_G5LbHTLP_M,723
+litellm/tests/test_configs/test_guardrails_config.yaml,sha256=YzDM9bWMmF4MFUl3563BeJ2Qv5TcgbS0VsE9BnAbDf4,879
+litellm/tests/test_cost_calc.py,sha256=AOcd8tnPE7jdJW8psXWMQEL5na2Vu0KG2HED94HWDLI,2866
+litellm/tests/test_custom_api_logger.py,sha256=0FbDI1zvtmM853ROJ6yLDlFShhQgNj6rNywqcYid5j4,1140
+litellm/tests/test_custom_callback_input.py,sha256=Zpf2Nevxr5um3Ds8YZ7TnCiCTF8LNzIZRLuI9cTffJs,46162
+litellm/tests/test_custom_callback_router.py,sha256=vf96FJKKVzU3ajWf-447FBy3uRC20W5zkTLtfB64CjM,27016
+litellm/tests/test_custom_llm.py,sha256=nkyz9F3QtzqEaxlz9TTwPU9ek6TjZf2_AYt9sSep6vU,8778
+litellm/tests/test_custom_logger.py,sha256=KO2tp2_9zYJAoQqtp17aJU5v9NOcYhJ-X5JuE-AhHA4,20233
+litellm/tests/test_datadog.py,sha256=pOCHxQIyLyFCe1UtcCs5dBFfqcXxuKq-FzlsRLMt7W8,604
+litellm/tests/test_deployed_proxy_keygen.py,sha256=V46OXA2ai5qb4Z_--uDYuxRvlIFTrTLWJ2pGCCT4VAs,2075
+litellm/tests/test_dynamic_rate_limit_handler.py,sha256=vGf6D7EQW7TebJiKOmYbte48q0YSGsIufqCZP1rIaqw,16646
+litellm/tests/test_dynamodb_logs.py,sha256=CVtPM8yfxcppNGBGQ4_RH-HSqfvnhyo8TSXpL0Tfo9Q,3850
+litellm/tests/test_embedding.py,sha256=RNVUhVxohD2GeqHedY_M7xpKaBWtvA2uNQTSJiQIEIY,25131
+litellm/tests/test_exceptions.py,sha256=HQj72sw3vBSFF3u-4TX1sfYq4bXP660-GBw-9Pdn-II,28774
+litellm/tests/test_file_types.py,sha256=AWhh4MG5ZXey-62NyvvfXIJub15kUTbeyAogUEIPV28,2267
+litellm/tests/test_fine_tuning_api.py,sha256=KtNlZE3KtbRYoTBc05v_bIoqFT_f5qw00O5Pvh9O-os,9254
+litellm/tests/test_fireworks_ai.py,sha256=jfIsD2reaTyOvnasOkbmYPEm_lX7QESsHszFdteKAI0,958
+litellm/tests/test_function_call_parsing.py,sha256=2tTLgOURuk1mnU0T3ZhzrseIJtGrba_thD0nzHDHpjs,4532
+litellm/tests/test_function_calling.py,sha256=nsa03fvWgYzn_YvYMMAoQom3shUZK2iqoZZAV8kqr7o,13187
+litellm/tests/test_function_setup.py,sha256=4-kKCVlXSDkcMg9zUwl0f554WlIyq9YSAtZ5-WEwKY8,734
+litellm/tests/test_gcs_bucket.py,sha256=Y942HopIJEkLx7XXFRDEr807I8dsHk4oNxr4fSfaKtE,10830
+litellm/tests/test_get_llm_provider.py,sha256=w3z0ofaBMjbAqBDDXq0hfInsl0ydovYqmv-MPKmkCEY,1940
+litellm/tests/test_get_model_file.py,sha256=R2BTCsbA5T3K3ze-AMQXW923aJph-TL6QSWpEBuz8mg,285
+litellm/tests/test_get_model_info.py,sha256=iQWkbueabmcj5XN0UoPYttudMO3xxEhUJ5v6fMPFjeI,1729
+litellm/tests/test_get_model_list.py,sha256=EfSrl6kM5JeUNnO_3qNi-RX6TtkGGf8MRzPdI1sEwGk,253
+litellm/tests/test_get_optional_params_embeddings.py,sha256=TmRFUSxyA8vkIIFi7TPlthIXfyE5h-Hi7H6c-nfoXD0,2051
+litellm/tests/test_get_optional_params_functions_not_supported.py,sha256=tOEV9HXTALvGsAbpMjp-oMeiyqbKed6oW6kTVJpx8gI,926
+litellm/tests/test_google_ai_studio_gemini.py,sha256=XSMVwiokSeDLaonEAt3jpTLevOa9-zws2bxgv_pkwZs,1113
+litellm/tests/test_guardrails_config.py,sha256=6UApgEBo0DdLe3z8wGTugL7DSCFb5rdUZpPBhWbNLEo,2275
+litellm/tests/test_health_check.py,sha256=MC6i8hA5r4zPFJVb5kUHIDh_gR3bPS5L7MbQVgfZa1k,2943
+litellm/tests/test_helicone_integration.py,sha256=2qz_g2DpBJBFUrrHe5rahQmOuyIu8845foICIJZqGWg,3511
+litellm/tests/test_hf_prompt_templates.py,sha256=NEtp_-cHEC0aZGCMBfvcRfbhz1YvFn9oLN2NCzaGrvw,2546
+litellm/tests/test_image_generation.py,sha256=P2Rfl9tM3PFU3Ez9ewMf0LVSeFaXyWD5xyf9VDZ3MIQ,7461
+litellm/tests/test_img_resize.py,sha256=kqW2Z3cdmOicmpMSz5l2-6j7eaYgKggm2Ez7d0BbIsI,2873
+litellm/tests/test_jwt.py,sha256=vi6Il9_wOzmZtT5rGLBi7L1t86UZ4yI6b-TzuGGyXkw,26872
+litellm/tests/test_key_generate_dynamodb.py,sha256=2H52x6RFvN_XcR1OKJ72yKWGEU71apN5z5sFazkxqo0,20093
+litellm/tests/test_key_generate_prisma.py,sha256=TTyyWtM5WWQy0KROBcQoN-B2-YeftKEImQH9tZi03Sk,102714
+litellm/tests/test_lakera_ai_prompt_injection.py,sha256=uRvHa9NoIOnD8icWGwIKj421Rw7wsSO_vdsCKWbDAw4,13730
+litellm/tests/test_langchain_ChatLiteLLM.py,sha256=kapU65kUxf1U1K8QJ7ny6J5CGncDLLYd02mku2XsPK0,2796
+litellm/tests/test_langsmith.py,sha256=uoiUlNZ0JzF8ABeD5bu5as7DFQtADKUh9XEYGevnI6g,6183
+litellm/tests/test_least_busy_routing.py,sha256=0KECP868dA_08ImKPckX6tWcC5BPjyxOxpAsYthgpjs,8676
+litellm/tests/test_litellm_max_budget.py,sha256=je8LIJbKm5-VCk1lAld_2hTgzfPMflgCQPxcDDO56ZQ,1113
+litellm/tests/test_llm_guard.py,sha256=36qOYyOsvfcQNl9HCWR-uq-cplaM36_9Uzi1CICo8Hs,4546
+litellm/tests/test_load_test_router_s3.py,sha256=aes22PJe9Hd_KUa-8MqJDK68lObVVS1Wv6nKdZq7O1E,3042
+litellm/tests/test_loadtest_router.py,sha256=AFJyZyWNBaCIhz-46ZHMZXnTRIfUbBVaMq35O_ECy-E,2831
+litellm/tests/test_logfire.py,sha256=SGVs3OtyEYl1N51pIPNDeNjRZYmMZ3C88xVO-HkWqqI,1998
+litellm/tests/test_logging.py,sha256=Nzv-pyERvVaYSYYd_ifth0tCy6o9deIxBkwv1zNw0mI,13682
+litellm/tests/test_longer_context_fallback.py,sha256=84kK5QeafD3CjXGro4U5tOhSZj2v9cyDgynBGniMcXU,324
+litellm/tests/test_lowest_cost_routing.py,sha256=I0_92uS1OXPz6s-CTSsv3vm0CKOEKQ0g2L4q10BMFf8,6288
+litellm/tests/test_lowest_latency_routing.py,sha256=ukCHn1m5LLLTq2IHdHGMkO6zBSiqW6KjyO2lFOT0KRw,29987
+litellm/tests/test_lunary.py,sha256=bUDusFAiMHT-KuZvrB2i-XGEnGbjct58riaLjXNwIsI,3187
+litellm/tests/test_max_tpm_rpm_limiter.py,sha256=9-ZkpfphcNLkvKJ7ZzkBN3YOBfC-vNzlTw8hhGIQU9E,5234
+litellm/tests/test_mem_usage.py,sha256=WYM-TrNEs-1_XPSZ-gZzl-LqaZ_bwTcPcQjoNNYze-c,4438
+litellm/tests/test_mock_request.py,sha256=6zhrKY6mS0FUbXW79I9Gwah7GmJi_mq1pzbkcYHormQ,2789
+litellm/tests/test_model_alias_map.py,sha256=epVVqwnI-JFL67A8sOfRPfi_Dq3vdq5eMKmjtZ3NDRo,1140
+litellm/tests/test_model_max_token_adjust.py,sha256=Z64EssBLYSL-U8mUuV06FSYpxBjG13CozA00A9eiSXQ,771
+litellm/tests/test_model_response_typing/server.py,sha256=TZUoo_zMl-6l-RX3OX_U0ESQvKd7x3iW5KS8bsoUm1E,626
+litellm/tests/test_model_response_typing/test.py,sha256=cTofJzFIDczLwLVeEq7uOt7I3lnwkbm6ayIFbwiGgJo,495
+litellm/tests/test_multiple_deployments.py,sha256=nMYKiALlVOEwbgzszijl_nTff5ksFGvHKrtM9bSMCWU,1643
+litellm/tests/test_ollama.py,sha256=uhsQrEyjDCihbAa7ECm70PdqL--ulnVEWrHbdqUXRpw,3651
+litellm/tests/test_ollama_local.py,sha256=wi44ljEQ7J0DcB0eZbDNAK0fFk3xp0Fm58Va7qeu9qk,15508
+litellm/tests/test_ollama_local_chat.py,sha256=trhkw-ZpxcaC6dejel2hU1HrrrQBJDU74EwhyPikKi4,14951
+litellm/tests/test_openai_batches_and_files.py,sha256=1AxCKfecQzfXvmu2k6Juxy3ze6MPpz1tdrOwVoXVO90,5923
+litellm/tests/test_openai_moderations_hook.py,sha256=PZYOADhQLFZg0vDBBowzcOvKCJt8u68kIX42tJDRMZI,2162
+litellm/tests/test_optional_params.py,sha256=ngm9Nxmz2T6sH5k3pz8MeqPDUkykmkiKw8LL7Ih7GP4,14467
+litellm/tests/test_parallel_request_limiter.py,sha256=mWFWby3rpy40iJLOL7KqQWmgZ4D7_PBRN22Xd-bAW_0,35831
+litellm/tests/test_pass_through_endpoints.py,sha256=xuIijwzVogrfwTw-sFZSedTEV8DWxz2dxAWZniGpEn8,11535
+litellm/tests/test_presidio_masking.py,sha256=saDN_vZEtVmg4DTmUpocSD2_SbLy5C-c4b1ln6Qg58w,9042
+litellm/tests/test_profiling_router.py,sha256=wBUfRXJo4o0XQmoIY1YK_wZeMPuXfSnSXPlg_hq0gFQ,5335
+litellm/tests/test_prometheus.py,sha256=mf7IiWnTAtLuySNoTZrCF1KSJX7aAJsc-z40DhaTJRE,2546
+litellm/tests/test_prometheus_service.py,sha256=HAlYs_349HJxCt10v4dZujxNTdjmHV848UzAfuk-3sw,4319
+litellm/tests/test_prompt_factory.py,sha256=ksMgDy8E_ISOCvzMSEa6N30fi4JiFQ92HNozszMaprw,12869
+litellm/tests/test_prompt_injection_detection.py,sha256=_EpHsnKvyVjMFWkK5Qs7OqW5lF8MzIASA_oTh3VLSHw,4441
+litellm/tests/test_promptlayer_integration.py,sha256=SW6DcI1od1o4XKmCISU5QmrBV2RPGbefS4n4S8pF_Hk,3288
+litellm/tests/test_provider_specific_config.py,sha256=Njr02kQTgxMYZcKHBaRdKA1ZOmzNpQYoSiFJ9sB2cm4,27043
+litellm/tests/test_proxy_custom_auth.py,sha256=PyoAyFuh2QJWEJ2gBlVdbiyqvDO68iBKAOK30b8b_yc,3058
+litellm/tests/test_proxy_custom_logger.py,sha256=0Zm8bef8shEEyo5iLiA0-pes2TZOsjnghsbL-eV0QGM,10705
+litellm/tests/test_proxy_encrypt_decrypt.py,sha256=Xg1Gt1Kq6rpRzWIi6EEMMYMuhVFqWXZyaXwtElWHclc,1513
+litellm/tests/test_proxy_exception_mapping.py,sha256=Q6StwyBpgf8Sazoge8uy3Jv40JhNpjIbwc_35SpdGBQ,10934
+litellm/tests/test_proxy_gunicorn.py,sha256=5uHJ7xJDjrxvEnJ_7y_g5qbqWgI8tr2LEhdzrPZLKSY,2077
+litellm/tests/test_proxy_pass_user_config.py,sha256=7xzdKfvsLrYtZCZfCyRoWtI88kvVPgd7KX_QJ-Ezy3A,3502
+litellm/tests/test_proxy_reject_logging.py,sha256=3PVx42s0JTnDfzFjyb0S2ecJ9BencG_rT9wa_xVPKWc,5228
+litellm/tests/test_proxy_routes.py,sha256=k7BSKIWpEPufWPlt_9RubIkX78TNZkpD2pdOjWIDZNg,4503
+litellm/tests/test_proxy_server.py,sha256=66hJsCEXQlwz5NH2rMDvj1_ZE_qUY1ui8wgqlj9sfi4,39332
+litellm/tests/test_proxy_server_caching.py,sha256=_77xqgmFtdrvkwbTgPK6vmHP0RAGU6aj_zgdNTEk0fQ,3276
+litellm/tests/test_proxy_server_cost.py,sha256=xvp0zhjwvtzcZTmxAvwmzzBfqTEduwZe39X_DBSRAZ8,4750
+litellm/tests/test_proxy_server_keys.py,sha256=t_E-h3XpSOx-JmPKezeT2tXkkHWZ0lYW3RU1LEc8ELQ,9545
+litellm/tests/test_proxy_server_langfuse.py,sha256=jnQq6x2v0VMrMSlRlO9AxndNHgVWT4tdvLZMJ-LdNbY,2514
+litellm/tests/test_proxy_server_spend.py,sha256=JPW7Fs4a8vU3J2lCraJfV6eLdpzfWGV0MNyvmAY1N2Y,2501
+litellm/tests/test_proxy_setting_guardrails.py,sha256=el0phiXreqDrtlrI_twzZL-J3uJ1IOcB6uTAVjcE6Lo,1943
+litellm/tests/test_proxy_token_counter.py,sha256=49AtDQb1OKo_h6NsZ9ibEDsW-GGoQBcTqqMPoSmjBPY,3543
+litellm/tests/test_proxy_utils.py,sha256=hhxYr9hb608HFq4KXPHMDtgXWxHLhBfY349NcgI3W_c,3073
+litellm/tests/test_pydantic.py,sha256=pawjEkMf8qLXLVYaMGZ6bNKoFIIQRZQpmYs0JBiK2pw,1579
+litellm/tests/test_pydantic_namespaces.py,sha256=lqJyzMqEqnruYPPH42EV_YAtgLYMyTgzZouPGYocpi4,469
+litellm/tests/test_python_38.py,sha256=H0VyVx2fazjLXgsdZyE1zg9Z1g_biwCXObvFSPHf4aQ,2279
+litellm/tests/test_register_model.py,sha256=Y7xcALoGS6Ju5DFa98Yt8l6qPFm0umDvX7erzHYTEaA,1921
+litellm/tests/test_router.py,sha256=41gLzfWLRXdEsegeZgYZHyUjBZ_iOL376SRavlpZQHQ,71634
+litellm/tests/test_router_batch_completion.py,sha256=eGo8KsVbSzmYT4TOESWzRFpj-UwdloFiasVfexbZAvc,5864
+litellm/tests/test_router_caching.py,sha256=z7E1Zs5inphbO3Mhg5nabF3OXDerjoooVEDXcg0bSIo,10811
+litellm/tests/test_router_client_init.py,sha256=lgHcK0MT4EFRCKW2Dow9sFTssJbembI6dpcyU4xDiwA,2385
+litellm/tests/test_router_cooldowns.py,sha256=TkKyea5W_ERiwe91AwuMeD-iocjjig5Dj_eBVKqKlSU,2781
+litellm/tests/test_router_custom_routing.py,sha256=vRHi_UJL3XVB5D6n3d4wRMFZDCfpqJxWRDPuhF9F1V8,5141
+litellm/tests/test_router_debug_logs.py,sha256=HVhEkrzpRl7DOBriCdwe9EWWwrr8M-XjCxKLdDmhCZQ,3245
+litellm/tests/test_router_fallbacks.py,sha256=0mWDuXcR-aYI0NIrIxhSJaNiblPm8PphudWEC86J_o8,43623
+litellm/tests/test_router_get_deployments.py,sha256=a-igGS5P_CqANKNCADOJMSqYwZ4UcwKBQXpYG_quamc,21075
+litellm/tests/test_router_init.py,sha256=KyDenTjpRcCeOtk1XDkUXldhAvJg0txxTQaOJmfqkg4,21745
+litellm/tests/test_router_max_parallel_requests.py,sha256=1nPeseBcoOWb2_ph_DbgkuiR2TYrsPvLUoQv8oTfp54,3659
+litellm/tests/test_router_policy_violation.py,sha256=L6LebYL03cal-VV-r3MNPBxnimXtPAwFOaJIQupYR-U,4721
+litellm/tests/test_router_retries.py,sha256=4HUzE2--14hZSyXKa2lzOnAL_eJb9IgExtx_myi1Zk0,18650
+litellm/tests/test_router_tag_routing.py,sha256=HaRScjQhSUCEkT_SOEncfCj_sX9T2DM-dfBwNybiQzw,2603
+litellm/tests/test_router_timeout.py,sha256=IqOJzrOlMwAHwjhLrLYymeOoEi3jDEm664FUbgER4NU,3789
+litellm/tests/test_router_utils.py,sha256=9a5B69qzBB1cLw0Wm-uB0FXWhPH2U-9L7re4y7eQJLc,3024
+litellm/tests/test_router_with_fallbacks.py,sha256=SgDOn464fDOwwAW-8XfsvGgRaNWHw6IXW3rMbBmAtxI,1704
+litellm/tests/test_rules.py,sha256=OxlwkWvmNHFZRTQv74Qm_ewnCeip7KsPR36OQLelI-8,4573
+litellm/tests/test_sagemaker.py,sha256=mqThmBOPQyIqZjZssnM6HYG4LSt1y3Ss7j8iMYP0Jj8,10377
+litellm/tests/test_scheduler.py,sha256=NZxqGtR5KvsGUhVGYLnsTNwbr_Z3lLvjPAQzPC7OqsY,2669
+litellm/tests/test_secret_detect_hook.py,sha256=AVdaGz4IRKKWovDjY8-st3UArPM-pS-3AXbcKPWrGkE,9257
+litellm/tests/test_secret_manager.py,sha256=Da3YnLGbzbXQerjLDxntJn1sqLLJAuLnxyyd1mf6nMU,6182
+litellm/tests/test_simple_shuffle.py,sha256=9BvKKOxa52NbQcqqAemgH8aVYBO0xK89d_DUgPrYuZE,1338
+litellm/tests/test_spend_calculate_endpoint.py,sha256=Lbp6cb1-LdAc-hHuCxx2hXhMZXGCXDv4LmcJqS1kAFA,3830
+litellm/tests/test_spend_logs.py,sha256=dNXRjoQLvSMMMHO_-KOPNg1fY5o5p4BXQPSOrBHohfo,12832
+litellm/tests/test_stream_chunk_builder.py,sha256=iWGZNHbcRXqgzcRs1ScrwSw--JAnkgiFkc38aJAM9SI,6751
+litellm/tests/test_streaming.py,sha256=qB4WSqHDKTUQZ-ZivsjRD6O8ZL8tpKpg5PCYPrLnvjM,130734
+litellm/tests/test_supabase_integration.py,sha256=Q3NPcTRvYUH7z-DkGOHPGliijQpzjt-wnGGqUhemXaM,2084
+litellm/tests/test_team_config.py,sha256=wsrpHa2C0QaiWFuWqLBJG1_XhM713Y257AN55YeClOo,1095
+litellm/tests/test_text_completion.py,sha256=_cVtQGxWAx13FOJoYdXaEIvpNMKPrLoHobNQbfbKwBo,88234
+litellm/tests/test_timeout.py,sha256=rhT-0M-yQv9_F7Nzyk5lFIzIqmdGgcCi7ts7gokt5NI,8548
+litellm/tests/test_together_ai.py,sha256=ITgIYYEVVCj6bo06bBthmOvAvZ1wZ-ImWdmmLeRPBA8,2046
+litellm/tests/test_token_counter.py,sha256=xz2nZ-cqtm64Z-1LJXlj81m5m5scuDgtqYJOk_gPo4c,14943
+litellm/tests/test_tpm_rpm_routing_v2.py,sha256=OavM6EBgApkgPuHPnSL7uQ_A_IRMUixLQ2RKuw_aDDQ,13629
+litellm/tests/test_traceloop.py,sha256=bRI1Q7nKgz3gDnZ_TsXKowp9mKkwOecwSZTgn3eW9xg,855
+litellm/tests/test_triton.py,sha256=VEbBkutgoaXGM1-J-w2kU0B3zmvl12_yjb7Lhfj98D8,845
+litellm/tests/test_update_spend.py,sha256=KNk22PJc3RcVw3Ig4qZUSJ7jOqJ3jt-HMyqLMBrcThk,2972
+litellm/tests/test_user_api_key_auth.py,sha256=oKuuilrO6NtCj3hQwcrvu9EyY2usUV4dvnhAqxdF5zk,6985
+litellm/tests/test_utils.py,sha256=tWHN2wMTvO7iB89mjs9if1Rtx5DIPEBp3T5680bND-E,24588
+litellm/tests/test_validate_environment.py,sha256=WHTVUmjJsjV83IShZxkM46kWXFhyNQQ0viTFj6nafFI,297
+litellm/tests/test_wandb.py,sha256=BKvRTVfLGed_2XVeNCXrz2JlMsgzqG8cb8h6Oi86anQ,1784
+litellm/tests/test_whisper.py,sha256=SquJQoF5uyiXnfRY8GKUntiu2DxqD2Jkp9pkoDkDuwc,5631
+litellm/tests/user_cost.json,sha256=1qlG-BK_Gi55K5QZdyQJ_EW8Srfhvrgj6t1BlDanQt4,208
+litellm/tests/vertex_ai.jsonl,sha256=2APsDSMZLO45Jqi1yg_f0c2B5S0wrQ_wvz8RCRWOTz4,654
+litellm/tests/vertex_key.json,sha256=usPH8ctWP1zQoyDl2u1u0BIbikk-YJWqHngQUQrA9c4,608
+litellm/timeout.py,sha256=x_Rxjhmn08v3rh_m-vcf0hYo4BS8jRuwSEsjYtbR61Q,4319
+litellm/types/adapter.py,sha256=VLUBbZaxxVWPTzOVbks1x4xSm1icGSW3b7abid_I3y0,222
+litellm/types/completion.py,sha256=KmlFzmPxZUuMpg_RtYxOvVsK9OoZi9VtWtUBoRvOHbg,5869
+litellm/types/embedding.py,sha256=-I4LM4kGCRwNtw0SiSngM8OePTRnrIjIiwNfwGY2slg,615
+litellm/types/files.py,sha256=nhTV3IKYO1XoVxuyWJ7_qr96cAOlG0QuZmnYGWU883A,7202
+litellm/types/guardrails.py,sha256=oz_0qcABCzxEY5d90-9hOVbwl2vSnod5kXOBZsBerMk,1678
+litellm/types/llms/anthropic.py,sha256=C-QAHxEHIaqEWdul95zz6pREe6miMK32z5S4mZVBrvY,6749
+litellm/types/llms/bedrock.py,sha256=KoGL-JGHS5C6CbQqmQEzVabNHmKrVhqmd7iXFqrpjq4,4546
+litellm/types/llms/cohere.py,sha256=68RmYVreF11AOBiHRIxOFdIKGpp1Ljp_gue4uwiphS8,1001
+litellm/types/llms/custom_llm.py,sha256=BQiianU1zrlBtLpjB6zaXe5-xeVZrsRZNb25go4gnqI,220
+litellm/types/llms/databricks.py,sha256=31TnMzir4nkDYFWuUhpI5aTDzZlg_1npPqQvZfkOjOs,493
+litellm/types/llms/openai.py,sha256=WbFdhQTQQuxzSo0M4yKL0bV4TYOaZHN-Dv1itOljc_0,13702
+litellm/types/llms/vertex_ai.py,sha256=rDvSfLIdegNQJlpXhFPEJMliFwsUDFirhE8rbdjaHWg,6855
+litellm/types/router.py,sha256=IBVNLSmyRZnyjK7VmhpsJl8ZNv2N5Koae4yWu2nCXZw,19141
+litellm/types/services.py,sha256=4gqWoMDYvFHjebAhTvW8ytt9QfpmrvLNDncfi4FyAIo,1008
+litellm/types/utils.py,sha256=xuEwz_lFPs97ebtwqOEJk5kJd3MzJQRT_CencgTeeok,36546
+litellm/utils.py,sha256=F8jNrTMREfORWmUEkxpbIpNCZ3XVcJ18PABCOcf-Cxo,500678
+litellm-1.43.18.dist-info/LICENSE,sha256=sXDWv46INd01fgEWgdsCj01R4vsOqJIFj1bgH7ObgnM,1419
+litellm-1.43.18.dist-info/METADATA,sha256=O31V_WebyjzPiWyLsPGTqdPXn1cL0F4sc26T-BaPd5k,32293
+litellm-1.43.18.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+litellm-1.43.18.dist-info/entry_points.txt,sha256=FGIGsq4hBWP2nfWEtKPIwxv67GXhoegZK_AF2oK447M,46
+litellm-1.43.18.dist-info/INSTALLER,sha256=HLHRd3rVxZqLVn0Nby492_jJUNACT5LifwfFYrwaW0E,12
+litellm-1.43.18.dist-info/RECORD,,

.venv/lib/python3.12/site-packages/litellm-1.43.18.dist-info/WHEEL ADDED Viewed

	@@ -0,0 +1,4 @@

+Wheel-Version: 1.0
+Generator: poetry-core 1.9.0
+Root-Is-Purelib: true
+Tag: py3-none-any

.venv/lib/python3.12/site-packages/litellm-1.43.18.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ [console_scripts]
2	+ litellm=litellm:run_server
3	+

.venv/lib/python3.12/site-packages/litellm/cost_calculator.py CHANGED Viewed

@@ -412,7 +412,7 @@ def get_replicate_completion_pricing(completion_response=None, total_time=0.0):
 def _select_model_name_for_cost_calc(
     model: Optional[str],
-    completion_response: Union[BaseModel, dict],
     base_model: Optional[str] = None,
     custom_pricing: Optional[bool] = None,
 ) -> Optional[str]:
@@ -428,7 +428,12 @@ def _select_model_name_for_cost_calc(
     if base_model is not None:
         return base_model
-    return_model = model or completion_response.get("model", "")  # type: ignore
     if hasattr(completion_response, "_hidden_params"):
         if (
             completion_response._hidden_params.get("model", None) is not None
@@ -660,7 +665,7 @@ def completion_cost(
         if custom_llm_provider is not None and custom_llm_provider == "vertex_ai":
             # Calculate the prompt characters + response characters
-            if len("messages") > 0:
                 prompt_string = litellm.utils.get_formatted_prompt(
                     data={"messages": messages}, call_type="completion"
                 )

 def _select_model_name_for_cost_calc(
     model: Optional[str],
+    completion_response: Union[BaseModel, dict, str],
     base_model: Optional[str] = None,
     custom_pricing: Optional[bool] = None,
 ) -> Optional[str]:
     if base_model is not None:
         return base_model
+    return_model = model
+    if isinstance(completion_response, str):
+        return return_model
+    elif return_model is None:
+        return_model = completion_response.get("model", "")  # type: ignore
     if hasattr(completion_response, "_hidden_params"):
         if (
             completion_response._hidden_params.get("model", None) is not None
         if custom_llm_provider is not None and custom_llm_provider == "vertex_ai":
             # Calculate the prompt characters + response characters
+            if len(messages) > 0:
                 prompt_string = litellm.utils.get_formatted_prompt(
                     data={"messages": messages}, call_type="completion"
                 )

.venv/lib/python3.12/site-packages/litellm/integrations/prometheus.py CHANGED Viewed

@@ -103,13 +103,30 @@ class PrometheusLogger(CustomLogger):
                 "Remaining budget for api key",
                 labelnames=["hashed_api_key", "api_key_alias"],
             )
-            ########################################
-            # LLM API Deployment Metrics / analytics
-            ########################################
             # Litellm-Enterprise Metrics
             if premium_user is True:
                 # Remaining Rate Limit for model
                 self.litellm_remaining_requests_metric = Gauge(
                     "litellm_remaining_requests",
@@ -187,6 +204,9 @@ class PrometheusLogger(CustomLogger):
     async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
         # Define prometheus client
         from litellm.proxy.proxy_server import premium_user
         verbose_logger.debug(
@@ -197,6 +217,7 @@ class PrometheusLogger(CustomLogger):
         model = kwargs.get("model", "")
         response_cost = kwargs.get("response_cost", 0.0) or 0
         litellm_params = kwargs.get("litellm_params", {}) or {}
         proxy_server_request = litellm_params.get("proxy_server_request") or {}
         end_user_id = proxy_server_request.get("body", {}).get("user", None)
         user_id = litellm_params.get("metadata", {}).get("user_api_key_user_id", None)
@@ -286,6 +307,27 @@ class PrometheusLogger(CustomLogger):
             user_api_key, user_api_key_alias
         ).set(_remaining_api_key_budget)
         # set x-ratelimit headers
         if premium_user is True:
             self.set_llm_deployment_success_metrics(

                 "Remaining budget for api key",
                 labelnames=["hashed_api_key", "api_key_alias"],
             )
             # Litellm-Enterprise Metrics
             if premium_user is True:
+                ########################################
+                # LiteLLM Virtual API KEY metrics
+                ########################################
+                # Remaining MODEL RPM limit for API Key
+                self.litellm_remaining_api_key_requests_for_model = Gauge(
+                    "litellm_remaining_api_key_requests_for_model",
+                    "Remaining Requests API Key can make for model (model based rpm limit on key)",
+                    labelnames=["hashed_api_key", "api_key_alias", "model"],
+                )
+                # Remaining MODEL TPM limit for API Key
+                self.litellm_remaining_api_key_tokens_for_model = Gauge(
+                    "litellm_remaining_api_key_tokens_for_model",
+                    "Remaining Tokens API Key can make for model (model based tpm limit on key)",
+                    labelnames=["hashed_api_key", "api_key_alias", "model"],
+                )
+                ########################################
+                # LLM API Deployment Metrics / analytics
+                ########################################
                 # Remaining Rate Limit for model
                 self.litellm_remaining_requests_metric = Gauge(
                     "litellm_remaining_requests",
     async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
         # Define prometheus client
+        from litellm.proxy.common_utils.callback_utils import (
+            get_model_group_from_litellm_kwargs,
+        )
         from litellm.proxy.proxy_server import premium_user
         verbose_logger.debug(
         model = kwargs.get("model", "")
         response_cost = kwargs.get("response_cost", 0.0) or 0
         litellm_params = kwargs.get("litellm_params", {}) or {}
+        _metadata = litellm_params.get("metadata", {})
         proxy_server_request = litellm_params.get("proxy_server_request") or {}
         end_user_id = proxy_server_request.get("body", {}).get("user", None)
         user_id = litellm_params.get("metadata", {}).get("user_api_key_user_id", None)
             user_api_key, user_api_key_alias
         ).set(_remaining_api_key_budget)
+        # Set remaining rpm/tpm for API Key + model
+        # see parallel_request_limiter.py - variables are set there
+        model_group = get_model_group_from_litellm_kwargs(kwargs)
+        remaining_requests_variable_name = (
+            f"litellm-key-remaining-requests-{model_group}"
+        )
+        remaining_tokens_variable_name = f"litellm-key-remaining-tokens-{model_group}"
+        remaining_requests = _metadata.get(
+            remaining_requests_variable_name, sys.maxsize
+        )
+        remaining_tokens = _metadata.get(remaining_tokens_variable_name, sys.maxsize)
+        self.litellm_remaining_api_key_requests_for_model.labels(
+            user_api_key, user_api_key_alias, model_group
+        ).set(remaining_requests)
+        self.litellm_remaining_api_key_tokens_for_model.labels(
+            user_api_key, user_api_key_alias, model_group
+        ).set(remaining_tokens)
         # set x-ratelimit headers
         if premium_user is True:
             self.set_llm_deployment_success_metrics(

.venv/lib/python3.12/site-packages/litellm/litellm_core_utils/litellm_logging.py CHANGED Viewed

@@ -274,6 +274,7 @@ class Logging:
                 headers = {}
             data = additional_args.get("complete_input_dict", {})
             api_base = str(additional_args.get("api_base", ""))
             if "key=" in api_base:
                 # Find the position of "key=" in the string
                 key_index = api_base.find("key=") + 4
@@ -2320,7 +2321,7 @@ def get_standard_logging_object_payload(
                     model_map_value=_model_cost_information,
                 )
             except Exception:
-                verbose_logger.warning(
                     "Model is not mapped in model cost map. Defaulting to None model_cost_information for standard_logging_payload"
                 )
                 model_cost_information = StandardLoggingModelInformation(
@@ -2362,7 +2363,7 @@ def get_standard_logging_object_payload(
         return payload
     except Exception as e:
-        verbose_logger.error(
             "Error creating standard logging object - {}".format(str(e))
         )
         return None

                 headers = {}
             data = additional_args.get("complete_input_dict", {})
             api_base = str(additional_args.get("api_base", ""))
+            query_params = additional_args.get("query_params", {})
             if "key=" in api_base:
                 # Find the position of "key=" in the string
                 key_index = api_base.find("key=") + 4
                     model_map_value=_model_cost_information,
                 )
             except Exception:
+                verbose_logger.debug(  # keep in debug otherwise it will trigger on every call
                     "Model is not mapped in model cost map. Defaulting to None model_cost_information for standard_logging_payload"
                 )
                 model_cost_information = StandardLoggingModelInformation(
         return payload
     except Exception as e:
+        verbose_logger.exception(
             "Error creating standard logging object - {}".format(str(e))
         )
         return None

.venv/lib/python3.12/site-packages/litellm/llms/anthropic.py CHANGED Viewed

@@ -1122,6 +1122,7 @@ class ModelResponseIterator:
         self.streaming_response = streaming_response
         self.response_iterator = self.streaming_response
         self.content_blocks: List[ContentBlockDelta] = []
     def check_empty_tool_call_args(self) -> bool:
         """
@@ -1171,7 +1172,7 @@ class ModelResponseIterator:
                             "name": None,
                             "arguments": content_block["delta"]["partial_json"],
                         },
-                        "index": content_block["index"],
                     }
             elif type_chunk == "content_block_start":
                 """
@@ -1183,6 +1184,7 @@ class ModelResponseIterator:
                 if content_block_start["content_block"]["type"] == "text":
                     text = content_block_start["content_block"]["text"]
                 elif content_block_start["content_block"]["type"] == "tool_use":
                     tool_use = {
                         "id": content_block_start["content_block"]["id"],
                         "type": "function",
@@ -1190,7 +1192,7 @@ class ModelResponseIterator:
                             "name": content_block_start["content_block"]["name"],
                             "arguments": "",
                         },
-                        "index": content_block_start["index"],
                     }
             elif type_chunk == "content_block_stop":
                 content_block_stop = ContentBlockStop(**chunk)  # type: ignore
@@ -1204,7 +1206,7 @@ class ModelResponseIterator:
                             "name": None,
                             "arguments": "{}",
                         },
-                        "index": content_block_stop["index"],
                     }
             elif type_chunk == "message_delta":
                 """

         self.streaming_response = streaming_response
         self.response_iterator = self.streaming_response
         self.content_blocks: List[ContentBlockDelta] = []
+        self.tool_index = -1
     def check_empty_tool_call_args(self) -> bool:
         """
                             "name": None,
                             "arguments": content_block["delta"]["partial_json"],
                         },
+                        "index": self.tool_index,
                     }
             elif type_chunk == "content_block_start":
                 """
                 if content_block_start["content_block"]["type"] == "text":
                     text = content_block_start["content_block"]["text"]
                 elif content_block_start["content_block"]["type"] == "tool_use":
+                    self.tool_index += 1
                     tool_use = {
                         "id": content_block_start["content_block"]["id"],
                         "type": "function",
                             "name": content_block_start["content_block"]["name"],
                             "arguments": "",
                         },
+                        "index": self.tool_index,
                     }
             elif type_chunk == "content_block_stop":
                 content_block_stop = ContentBlockStop(**chunk)  # type: ignore
                             "name": None,
                             "arguments": "{}",
                         },
+                        "index": self.tool_index,
                     }
             elif type_chunk == "message_delta":
                 """

.venv/lib/python3.12/site-packages/litellm/llms/prompt_templates/factory.py CHANGED Viewed

@@ -1010,6 +1010,9 @@ def convert_to_gemini_tool_call_invoke(
                 name = tool["function"].get("name", "")
                 arguments = tool["function"].get("arguments", "")
                 arguments_dict = json.loads(arguments)
                 for k, v in arguments_dict.items():
                     inferred_protocol_value = infer_protocol_value(value=v)
                     _field = litellm.types.llms.vertex_ai.Field(
@@ -1022,9 +1025,18 @@ def convert_to_gemini_tool_call_invoke(
                         name=name,
                         args=_fields,
                     )
-                _parts_list.append(
-                    litellm.types.llms.vertex_ai.PartType(function_call=function_call)
-                )
         return _parts_list
     except Exception as e:
         raise Exception(

                 name = tool["function"].get("name", "")
                 arguments = tool["function"].get("arguments", "")
                 arguments_dict = json.loads(arguments)
+                function_call: Optional[litellm.types.llms.vertex_ai.FunctionCall] = (
+                    None
+                )
                 for k, v in arguments_dict.items():
                     inferred_protocol_value = infer_protocol_value(value=v)
                     _field = litellm.types.llms.vertex_ai.Field(
                         name=name,
                         args=_fields,
                     )
+                if function_call is not None:
+                    _parts_list.append(
+                        litellm.types.llms.vertex_ai.PartType(
+                            function_call=function_call
+                        )
+                    )
+                else:  # don't silently drop params. Make it clear to user what's happening.
+                    raise Exception(
+                        "function_call missing. Received tool call with 'type': 'function'. No function call in argument - {}".format(
+                            tool
+                        )
+                    )
         return _parts_list
     except Exception as e:
         raise Exception(

.venv/lib/python3.12/site-packages/litellm/llms/vertex_httpx.py CHANGED Viewed

@@ -491,6 +491,16 @@ class VertexGeminiConfig:
             "SPII": "The token generation was stopped as the response was flagged for Sensitive Personally Identifiable Information (SPII) contents.",
         }
 async def make_call(
     client: Optional[AsyncHTTPHandler],
@@ -504,8 +514,15 @@ async def make_call(
     if client is None:
         client = AsyncHTTPHandler()  # Create a new client if none provided
-    response = await client.post(api_base, headers=headers, data=data, stream=True)
     if response.status_code != 200:
         raise VertexAIError(status_code=response.status_code, message=response.text)

             "SPII": "The token generation was stopped as the response was flagged for Sensitive Personally Identifiable Information (SPII) contents.",
         }
+    def translate_exception_str(self, exception_string: str):
+        if (
+            "GenerateContentRequest.tools[0].function_declarations[0].parameters.properties: should be non-empty for OBJECT type"
+            in exception_string
+        ):
+            return "'properties' field in tools[0]['function']['parameters'] cannot be empty if 'type' == 'object'. Received error from provider - {}".format(
+                exception_string
+            )
+        return exception_string
 async def make_call(
     client: Optional[AsyncHTTPHandler],
     if client is None:
         client = AsyncHTTPHandler()  # Create a new client if none provided
+    try:
+        response = await client.post(api_base, headers=headers, data=data, stream=True)
+        response.raise_for_status()
+    except httpx.HTTPStatusError as e:
+        exception_string = str(await e.response.aread())
+        raise VertexAIError(
+            status_code=e.response.status_code,
+            message=VertexGeminiConfig().translate_exception_str(exception_string),
+        )
     if response.status_code != 200:
         raise VertexAIError(status_code=response.status_code, message=response.text)

.venv/lib/python3.12/site-packages/litellm/model_prices_and_context_window_backup.json CHANGED Viewed

@@ -4201,6 +4201,15 @@
         "litellm_provider": "ollama",
         "mode": "completion"
     },
     "ollama/llama2:13b": {
         "max_tokens": 4096,
         "max_input_tokens": 4096,
@@ -4237,6 +4246,15 @@
         "litellm_provider": "ollama",
         "mode": "chat"
     },
     "ollama/llama3:70b": {
         "max_tokens": 8192,
         "max_input_tokens": 8192,

         "litellm_provider": "ollama",
         "mode": "completion"
     },
+    "ollama/llama2:7b": {
+        "max_tokens": 4096,
+        "max_input_tokens": 4096,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "completion"
+    },
     "ollama/llama2:13b": {
         "max_tokens": 4096,
         "max_input_tokens": 4096,
         "litellm_provider": "ollama",
         "mode": "chat"
     },
+    "ollama/llama3:8b": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
     "ollama/llama3:70b": {
         "max_tokens": 8192,
         "max_input_tokens": 8192,

.venv/lib/python3.12/site-packages/litellm/proxy/_new_secret_config.yaml CHANGED Viewed

@@ -1,6 +1,4 @@
 model_list:
-  - model_name: "text-embedding-ada-002"
     litellm_params:
-      model: "azure/azure-embedding-model"
-      api_base: os.environ/AZURE_API_BASE
-      api_key: os.environ/AZURE_API_KEY

 model_list:
+  - model_name: "*"
     litellm_params:
+      model: "*"

.venv/lib/python3.12/site-packages/litellm/proxy/_types.py CHANGED Viewed

@@ -585,6 +585,8 @@ class GenerateKeyRequest(GenerateRequestBase):
     model_config = ConfigDict(protected_namespaces=())
     send_invite_email: Optional[bool] = None
 class GenerateKeyResponse(GenerateKeyRequest):
@@ -1337,6 +1339,8 @@ class UserAPIKeyAuth(
     ] = None
     allowed_model_region: Optional[Literal["eu"]] = None
     parent_otel_span: Optional[Span] = None
     @model_validator(mode="before")
     @classmethod

     model_config = ConfigDict(protected_namespaces=())
     send_invite_email: Optional[bool] = None
+    model_rpm_limit: Optional[dict] = None
+    model_tpm_limit: Optional[dict] = None
 class GenerateKeyResponse(GenerateKeyRequest):
     ] = None
     allowed_model_region: Optional[Literal["eu"]] = None
     parent_otel_span: Optional[Span] = None
+    rpm_limit_per_model: Optional[Dict[str, int]] = None
+    tpm_limit_per_model: Optional[Dict[str, int]] = None
     @model_validator(mode="before")
     @classmethod

.venv/lib/python3.12/site-packages/litellm/proxy/auth/auth_checks.py CHANGED Viewed

@@ -10,7 +10,7 @@ Run checks for:
 """
 import time
 from datetime import datetime
-from typing import TYPE_CHECKING, Any, Literal, Optional
 import litellm
 from litellm._logging import verbose_proxy_logger
@@ -77,6 +77,11 @@ def common_checks(
         if "all-proxy-models" in team_object.models:
             # this means the team has access to all models on the proxy
             pass
         else:
             raise Exception(
                 f"Team={team_object.team_id} not allowed to call model={_model}. Allowed team models = {team_object.models}"
@@ -327,6 +332,39 @@ async def get_end_user_object(
         return None
 @log_to_opentelemetry
 async def get_user_object(
     user_id: str,
@@ -543,12 +581,11 @@ async def can_key_call_model(
     )
     from collections import defaultdict
     access_groups = defaultdict(list)
-    if llm_model_list is not None:
-        for m in llm_model_list:
-            for group in m.get("model_info", {}).get("access_groups", []):
-                model_name = m["model_name"]
-                access_groups[group].append(model_name)
     models_in_current_access_groups = []
     if len(access_groups) > 0:  # check if token contains any model access groups

 """
 import time
 from datetime import datetime
+from typing import TYPE_CHECKING, Any, List, Literal, Optional
 import litellm
 from litellm._logging import verbose_proxy_logger
         if "all-proxy-models" in team_object.models:
             # this means the team has access to all models on the proxy
             pass
+        # check if the team model is an access_group
+        elif model_in_access_group(_model, team_object.models) is True:
+            pass
+        elif _model and "*" in _model:
+            pass
         else:
             raise Exception(
                 f"Team={team_object.team_id} not allowed to call model={_model}. Allowed team models = {team_object.models}"
         return None
+def model_in_access_group(model: str, team_models: Optional[List[str]]) -> bool:
+    from collections import defaultdict
+    from litellm.proxy.proxy_server import llm_router
+    if team_models is None:
+        return True
+    if model in team_models:
+        return True
+    access_groups = defaultdict(list)
+    if llm_router:
+        access_groups = llm_router.get_model_access_groups()
+    models_in_current_access_groups = []
+    if len(access_groups) > 0:  # check if token contains any model access groups
+        for idx, m in enumerate(
+            team_models
+        ):  # loop token models, if any of them are an access group add the access group
+            if m in access_groups:
+                # if it is an access group we need to remove it from valid_token.models
+                models_in_group = access_groups[m]
+                models_in_current_access_groups.extend(models_in_group)
+    # Filter out models that are access_groups
+    filtered_models = [m for m in team_models if m not in access_groups]
+    filtered_models += models_in_current_access_groups
+    if model in filtered_models:
+        return True
+    return False
 @log_to_opentelemetry
 async def get_user_object(
     user_id: str,
     )
     from collections import defaultdict
+    from litellm.proxy.proxy_server import llm_router
     access_groups = defaultdict(list)
+    if llm_router:
+        access_groups = llm_router.get_model_access_groups()
     models_in_current_access_groups = []
     if len(access_groups) > 0:  # check if token contains any model access groups

.venv/lib/python3.12/site-packages/litellm/proxy/auth/auth_utils.py CHANGED Viewed

@@ -210,3 +210,20 @@ def bytes_to_mb(bytes_value: int):
     Helper to convert bytes to MB
     """
     return bytes_value / (1024 * 1024)

     Helper to convert bytes to MB
     """
     return bytes_value / (1024 * 1024)
+# helpers used by parallel request limiter to handle model rpm/tpm limits for a given api key
+def get_key_model_rpm_limit(user_api_key_dict: UserAPIKeyAuth) -> Optional[dict]:
+    if user_api_key_dict.metadata:
+        if "model_rpm_limit" in user_api_key_dict.metadata:
+            return user_api_key_dict.metadata["model_rpm_limit"]
+    return None
+def get_key_model_tpm_limit(user_api_key_dict: UserAPIKeyAuth) -> Optional[dict]:
+    if user_api_key_dict.metadata:
+        if "model_tpm_limit" in user_api_key_dict.metadata:
+            return user_api_key_dict.metadata["model_tpm_limit"]
+    return None

.venv/lib/python3.12/site-packages/litellm/proxy/common_utils/callback_utils.py ADDED Viewed

	@@ -0,0 +1,297 @@

+import sys
+from typing import Any, Dict, List, Optional, get_args
+import litellm
+from litellm._logging import verbose_proxy_logger
+from litellm.proxy._types import CommonProxyErrors, LiteLLMPromptInjectionParams
+from litellm.proxy.utils import get_instance_fn
+blue_color_code = "\033[94m"
+reset_color_code = "\033[0m"
+def initialize_callbacks_on_proxy(
+    value: Any,
+    premium_user: bool,
+    config_file_path: str,
+    litellm_settings: dict,
+    callback_specific_params: dict = {},
+):
+    from litellm.proxy.proxy_server import prisma_client
+    verbose_proxy_logger.debug(
+        f"{blue_color_code}initializing callbacks={value} on proxy{reset_color_code}"
+    )
+    if isinstance(value, list):
+        imported_list: List[Any] = []
+        for callback in value:  # ["presidio", <my-custom-callback>]
+            if (
+                isinstance(callback, str)
+                and callback in litellm._known_custom_logger_compatible_callbacks
+            ):
+                imported_list.append(callback)
+            elif isinstance(callback, str) and callback == "otel":
+                from litellm.integrations.opentelemetry import OpenTelemetry
+                from litellm.proxy import proxy_server
+                open_telemetry_logger = OpenTelemetry()
+                # Add Otel as a service callback
+                if "otel" not in litellm.service_callback:
+                    litellm.service_callback.append("otel")
+                imported_list.append(open_telemetry_logger)
+                setattr(proxy_server, "open_telemetry_logger", open_telemetry_logger)
+            elif isinstance(callback, str) and callback == "presidio":
+                from litellm.proxy.hooks.presidio_pii_masking import (
+                    _OPTIONAL_PresidioPIIMasking,
+                )
+                presidio_logging_only: Optional[bool] = litellm_settings.get(
+                    "presidio_logging_only", None
+                )
+                if presidio_logging_only is not None:
+                    presidio_logging_only = bool(
+                        presidio_logging_only
+                    )  # validate boolean given
+                params = {
+                    "logging_only": presidio_logging_only,
+                    **callback_specific_params.get("presidio", {}),
+                }
+                pii_masking_object = _OPTIONAL_PresidioPIIMasking(**params)
+                imported_list.append(pii_masking_object)
+            elif isinstance(callback, str) and callback == "llamaguard_moderations":
+                from enterprise.enterprise_hooks.llama_guard import (
+                    _ENTERPRISE_LlamaGuard,
+                )
+                if premium_user != True:
+                    raise Exception(
+                        "Trying to use Llama Guard"
+                        + CommonProxyErrors.not_premium_user.value
+                    )
+                llama_guard_object = _ENTERPRISE_LlamaGuard()
+                imported_list.append(llama_guard_object)
+            elif isinstance(callback, str) and callback == "hide_secrets":
+                from enterprise.enterprise_hooks.secret_detection import (
+                    _ENTERPRISE_SecretDetection,
+                )
+                if premium_user != True:
+                    raise Exception(
+                        "Trying to use secret hiding"
+                        + CommonProxyErrors.not_premium_user.value
+                    )
+                _secret_detection_object = _ENTERPRISE_SecretDetection()
+                imported_list.append(_secret_detection_object)
+            elif isinstance(callback, str) and callback == "openai_moderations":
+                from enterprise.enterprise_hooks.openai_moderation import (
+                    _ENTERPRISE_OpenAI_Moderation,
+                )
+                if premium_user != True:
+                    raise Exception(
+                        "Trying to use OpenAI Moderations Check"
+                        + CommonProxyErrors.not_premium_user.value
+                    )
+                openai_moderations_object = _ENTERPRISE_OpenAI_Moderation()
+                imported_list.append(openai_moderations_object)
+            elif isinstance(callback, str) and callback == "lakera_prompt_injection":
+                from enterprise.enterprise_hooks.lakera_ai import (
+                    _ENTERPRISE_lakeraAI_Moderation,
+                )
+                if premium_user != True:
+                    raise Exception(
+                        "Trying to use LakeraAI Prompt Injection"
+                        + CommonProxyErrors.not_premium_user.value
+                    )
+                init_params = {}
+                if "lakera_prompt_injection" in callback_specific_params:
+                    init_params = callback_specific_params["lakera_prompt_injection"]
+                lakera_moderations_object = _ENTERPRISE_lakeraAI_Moderation(
+                    **init_params
+                )
+                imported_list.append(lakera_moderations_object)
+            elif isinstance(callback, str) and callback == "aporio_prompt_injection":
+                from enterprise.enterprise_hooks.aporio_ai import _ENTERPRISE_Aporio
+                if premium_user is not True:
+                    raise Exception(
+                        "Trying to use Aporio AI Guardrail"
+                        + CommonProxyErrors.not_premium_user.value
+                    )
+                aporio_guardrail_object = _ENTERPRISE_Aporio()
+                imported_list.append(aporio_guardrail_object)
+            elif isinstance(callback, str) and callback == "google_text_moderation":
+                from enterprise.enterprise_hooks.google_text_moderation import (
+                    _ENTERPRISE_GoogleTextModeration,
+                )
+                if premium_user != True:
+                    raise Exception(
+                        "Trying to use Google Text Moderation"
+                        + CommonProxyErrors.not_premium_user.value
+                    )
+                google_text_moderation_obj = _ENTERPRISE_GoogleTextModeration()
+                imported_list.append(google_text_moderation_obj)
+            elif isinstance(callback, str) and callback == "llmguard_moderations":
+                from enterprise.enterprise_hooks.llm_guard import _ENTERPRISE_LLMGuard
+                if premium_user != True:
+                    raise Exception(
+                        "Trying to use Llm Guard"
+                        + CommonProxyErrors.not_premium_user.value
+                    )
+                llm_guard_moderation_obj = _ENTERPRISE_LLMGuard()
+                imported_list.append(llm_guard_moderation_obj)
+            elif isinstance(callback, str) and callback == "blocked_user_check":
+                from enterprise.enterprise_hooks.blocked_user_list import (
+                    _ENTERPRISE_BlockedUserList,
+                )
+                if premium_user != True:
+                    raise Exception(
+                        "Trying to use ENTERPRISE BlockedUser"
+                        + CommonProxyErrors.not_premium_user.value
+                    )
+                blocked_user_list = _ENTERPRISE_BlockedUserList(
+                    prisma_client=prisma_client
+                )
+                imported_list.append(blocked_user_list)
+            elif isinstance(callback, str) and callback == "banned_keywords":
+                from enterprise.enterprise_hooks.banned_keywords import (
+                    _ENTERPRISE_BannedKeywords,
+                )
+                if premium_user != True:
+                    raise Exception(
+                        "Trying to use ENTERPRISE BannedKeyword"
+                        + CommonProxyErrors.not_premium_user.value
+                    )
+                banned_keywords_obj = _ENTERPRISE_BannedKeywords()
+                imported_list.append(banned_keywords_obj)
+            elif isinstance(callback, str) and callback == "detect_prompt_injection":
+                from litellm.proxy.hooks.prompt_injection_detection import (
+                    _OPTIONAL_PromptInjectionDetection,
+                )
+                prompt_injection_params = None
+                if "prompt_injection_params" in litellm_settings:
+                    prompt_injection_params_in_config = litellm_settings[
+                        "prompt_injection_params"
+                    ]
+                    prompt_injection_params = LiteLLMPromptInjectionParams(
+                        **prompt_injection_params_in_config
+                    )
+                prompt_injection_detection_obj = _OPTIONAL_PromptInjectionDetection(
+                    prompt_injection_params=prompt_injection_params,
+                )
+                imported_list.append(prompt_injection_detection_obj)
+            elif isinstance(callback, str) and callback == "batch_redis_requests":
+                from litellm.proxy.hooks.batch_redis_get import (
+                    _PROXY_BatchRedisRequests,
+                )
+                batch_redis_obj = _PROXY_BatchRedisRequests()
+                imported_list.append(batch_redis_obj)
+            elif isinstance(callback, str) and callback == "azure_content_safety":
+                from litellm.proxy.hooks.azure_content_safety import (
+                    _PROXY_AzureContentSafety,
+                )
+                azure_content_safety_params = litellm_settings[
+                    "azure_content_safety_params"
+                ]
+                for k, v in azure_content_safety_params.items():
+                    if (
+                        v is not None
+                        and isinstance(v, str)
+                        and v.startswith("os.environ/")
+                    ):
+                        azure_content_safety_params[k] = litellm.get_secret(v)
+                azure_content_safety_obj = _PROXY_AzureContentSafety(
+                    **azure_content_safety_params,
+                )
+                imported_list.append(azure_content_safety_obj)
+            else:
+                verbose_proxy_logger.debug(
+                    f"{blue_color_code} attempting to import custom calback={callback} {reset_color_code}"
+                )
+                imported_list.append(
+                    get_instance_fn(
+                        value=callback,
+                        config_file_path=config_file_path,
+                    )
+                )
+        if isinstance(litellm.callbacks, list):
+            litellm.callbacks.extend(imported_list)
+        else:
+            litellm.callbacks = imported_list  # type: ignore
+    else:
+        litellm.callbacks = [
+            get_instance_fn(
+                value=value,
+                config_file_path=config_file_path,
+            )
+        ]
+    verbose_proxy_logger.debug(
+        f"{blue_color_code} Initialized Callbacks - {litellm.callbacks} {reset_color_code}"
+    )
+def get_model_group_from_litellm_kwargs(kwargs: dict) -> Optional[str]:
+    _litellm_params = kwargs.get("litellm_params", None) or {}
+    _metadata = _litellm_params.get("metadata", None) or {}
+    _model_group = _metadata.get("model_group", None)
+    if _model_group is not None:
+        return _model_group
+    return None
+def get_model_group_from_request_data(data: dict) -> Optional[str]:
+    _metadata = data.get("metadata", None) or {}
+    _model_group = _metadata.get("model_group", None)
+    if _model_group is not None:
+        return _model_group
+    return None
+def get_remaining_tokens_and_requests_from_request_data(data: Dict) -> Dict[str, str]:
+    """
+    Helper function to return x-litellm-key-remaining-tokens-{model_group} and x-litellm-key-remaining-requests-{model_group}
+    Returns {} when api_key + model rpm/tpm limit is not set
+    """
+    headers = {}
+    _metadata = data.get("metadata", None) or {}
+    model_group = get_model_group_from_request_data(data)
+    # Remaining Requests
+    remaining_requests_variable_name = f"litellm-key-remaining-requests-{model_group}"
+    remaining_requests = _metadata.get(remaining_requests_variable_name, None)
+    if remaining_requests:
+        headers[f"x-litellm-key-remaining-requests-{model_group}"] = remaining_requests
+    # Remaining Tokens
+    remaining_tokens_variable_name = f"litellm-key-remaining-tokens-{model_group}"
+    remaining_tokens = _metadata.get(remaining_tokens_variable_name, None)
+    if remaining_tokens:
+        headers[f"x-litellm-key-remaining-tokens-{model_group}"] = remaining_tokens
+    return headers

.venv/lib/python3.12/site-packages/litellm/proxy/guardrails/init_guardrails.py CHANGED Viewed

@@ -5,7 +5,7 @@ from pydantic import BaseModel, RootModel
 import litellm
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy.common_utils.init_callbacks import initialize_callbacks_on_proxy
 from litellm.types.guardrails import GuardrailItem, GuardrailItemSpec
 all_guardrails: List[GuardrailItem] = []

 import litellm
 from litellm._logging import verbose_proxy_logger
+from litellm.proxy.common_utils.callback_utils import initialize_callbacks_on_proxy
 from litellm.types.guardrails import GuardrailItem, GuardrailItemSpec
 all_guardrails: List[GuardrailItem] = []

.venv/lib/python3.12/site-packages/litellm/proxy/hooks/parallel_request_limiter.py CHANGED Viewed

@@ -11,6 +11,10 @@ from litellm._logging import verbose_proxy_logger
 from litellm.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.proxy._types import UserAPIKeyAuth
 class _PROXY_MaxParallelRequestsHandler(CustomLogger):
@@ -202,6 +206,85 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
                     additional_details=f"Hit limit for api_key: {api_key}. tpm_limit: {tpm_limit}, current_tpm {current['current_tpm']} , rpm_limit: {rpm_limit} current rpm {current['current_rpm']} "
                 )
         # check if REQUEST ALLOWED for user_id
         user_id = user_api_key_dict.user_id
         if user_id is not None:
@@ -299,6 +382,10 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
         return
     async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
         try:
             self.print_verbose("INSIDE parallel request limiter ASYNC SUCCESS LOGGING")
             global_max_parallel_requests = kwargs["litellm_params"]["metadata"].get(
@@ -365,6 +452,36 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
                     request_count_api_key, new_val, ttl=60
                 )  # store in cache for 1 min.
             # ------------
             # Update usage - User
             # ------------

 from litellm.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.proxy._types import UserAPIKeyAuth
+from litellm.proxy.auth.auth_utils import (
+    get_key_model_rpm_limit,
+    get_key_model_tpm_limit,
+)
 class _PROXY_MaxParallelRequestsHandler(CustomLogger):
                     additional_details=f"Hit limit for api_key: {api_key}. tpm_limit: {tpm_limit}, current_tpm {current['current_tpm']} , rpm_limit: {rpm_limit} current rpm {current['current_rpm']} "
                 )
+        # Check if request under RPM/TPM per model for a given API Key
+        if (
+            get_key_model_tpm_limit(user_api_key_dict) is not None
+            or get_key_model_rpm_limit(user_api_key_dict) is not None
+        ):
+            _model = data.get("model", None)
+            request_count_api_key = (
+                f"{api_key}::{_model}::{precise_minute}::request_count"
+            )
+            current = await self.internal_usage_cache.async_get_cache(
+                key=request_count_api_key
+            )  # {"current_requests": 1, "current_tpm": 1, "current_rpm": 10}
+            tpm_limit_for_model = None
+            rpm_limit_for_model = None
+            _tpm_limit_for_key_model = get_key_model_tpm_limit(user_api_key_dict)
+            _rpm_limit_for_key_model = get_key_model_rpm_limit(user_api_key_dict)
+            if _model is not None:
+                if _tpm_limit_for_key_model:
+                    tpm_limit_for_model = _tpm_limit_for_key_model.get(_model)
+                if _rpm_limit_for_key_model:
+                    rpm_limit_for_model = _rpm_limit_for_key_model.get(_model)
+            if current is None:
+                new_val = {
+                    "current_requests": 1,
+                    "current_tpm": 0,
+                    "current_rpm": 0,
+                }
+                await self.internal_usage_cache.async_set_cache(
+                    request_count_api_key, new_val
+                )
+            elif tpm_limit_for_model is not None or rpm_limit_for_model is not None:
+                # Increase count for this token
+                new_val = {
+                    "current_requests": current["current_requests"] + 1,
+                    "current_tpm": current["current_tpm"],
+                    "current_rpm": current["current_rpm"],
+                }
+                if (
+                    tpm_limit_for_model is not None
+                    and current["current_tpm"] >= tpm_limit_for_model
+                ):
+                    return self.raise_rate_limit_error(
+                        additional_details=f"Hit TPM limit for model: {_model} on api_key: {api_key}. tpm_limit: {tpm_limit_for_model}, current_tpm {current['current_tpm']} "
+                    )
+                elif (
+                    rpm_limit_for_model is not None
+                    and current["current_rpm"] >= rpm_limit_for_model
+                ):
+                    return self.raise_rate_limit_error(
+                        additional_details=f"Hit RPM limit for model: {_model} on api_key: {api_key}. rpm_limit: {rpm_limit_for_model}, current_rpm {current['current_rpm']} "
+                    )
+                else:
+                    await self.internal_usage_cache.async_set_cache(
+                        request_count_api_key, new_val
+                    )
+            _remaining_tokens = None
+            _remaining_requests = None
+            # Add remaining tokens, requests to metadata
+            if tpm_limit_for_model is not None:
+                _remaining_tokens = tpm_limit_for_model - new_val["current_tpm"]
+            if rpm_limit_for_model is not None:
+                _remaining_requests = rpm_limit_for_model - new_val["current_rpm"]
+            _remaining_limits_data = {
+                f"litellm-key-remaining-tokens-{_model}": _remaining_tokens,
+                f"litellm-key-remaining-requests-{_model}": _remaining_requests,
+            }
+            if "metadata" not in data:
+                data["metadata"] = {}
+            data["metadata"].update(_remaining_limits_data)
         # check if REQUEST ALLOWED for user_id
         user_id = user_api_key_dict.user_id
         if user_id is not None:
         return
     async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
+        from litellm.proxy.common_utils.callback_utils import (
+            get_model_group_from_litellm_kwargs,
+        )
         try:
             self.print_verbose("INSIDE parallel request limiter ASYNC SUCCESS LOGGING")
             global_max_parallel_requests = kwargs["litellm_params"]["metadata"].get(
                     request_count_api_key, new_val, ttl=60
                 )  # store in cache for 1 min.
+            # ------------
+            # Update usage - model group + API Key
+            # ------------
+            model_group = get_model_group_from_litellm_kwargs(kwargs)
+            if user_api_key is not None and model_group is not None:
+                request_count_api_key = (
+                    f"{user_api_key}::{model_group}::{precise_minute}::request_count"
+                )
+                current = await self.internal_usage_cache.async_get_cache(
+                    key=request_count_api_key
+                ) or {
+                    "current_requests": 1,
+                    "current_tpm": total_tokens,
+                    "current_rpm": 1,
+                }
+                new_val = {
+                    "current_requests": max(current["current_requests"] - 1, 0),
+                    "current_tpm": current["current_tpm"] + total_tokens,
+                    "current_rpm": current["current_rpm"] + 1,
+                }
+                self.print_verbose(
+                    f"updated_value in success call: {new_val}, precise_minute: {precise_minute}"
+                )
+                await self.internal_usage_cache.async_set_cache(
+                    request_count_api_key, new_val, ttl=60
+                )
             # ------------
             # Update usage - User
             # ------------

.venv/lib/python3.12/site-packages/litellm/proxy/management_endpoints/key_management_endpoints.py CHANGED Viewed

@@ -68,7 +68,8 @@ async def generate_key_fn(
     - metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
     - permissions: Optional[dict] - key-specific permissions. Currently just used for turning off pii masking (if connected). Example - {"pii": false}
     - model_max_budget: Optional[dict] - key-specific model budget in USD. Example - {"text-davinci-002": 0.5, "gpt-3.5-turbo": 0.5}. IF null or {} then no model specific budget.
     Examples:
     1. Allow users to turn on/off pii masking
@@ -323,6 +324,9 @@ async def update_key_fn(
         # get non default values for key
         non_default_values = {}
         for k, v in data_json.items():
             if v is not None and v not in (
                 [],
                 {},
@@ -343,6 +347,25 @@ async def update_key_fn(
             key_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
             non_default_values["budget_reset_at"] = key_reset_at
         response = await prisma_client.update_data(
             token=key, data={**non_default_values, "token": key}
         )
@@ -709,6 +732,8 @@ async def generate_key_helper_fn(
     allowed_cache_controls: Optional[list] = [],
     permissions: Optional[dict] = {},
     model_max_budget: Optional[dict] = {},
     teams: Optional[list] = None,
     organization_id: Optional[str] = None,
     table_name: Optional[Literal["key", "user"]] = None,
@@ -750,6 +775,15 @@ async def generate_key_helper_fn(
     aliases_json = json.dumps(aliases)
     config_json = json.dumps(config)
     permissions_json = json.dumps(permissions)
     metadata_json = json.dumps(metadata)
     model_max_budget_json = json.dumps(model_max_budget)
     user_role = user_role

     - metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
     - permissions: Optional[dict] - key-specific permissions. Currently just used for turning off pii masking (if connected). Example - {"pii": false}
     - model_max_budget: Optional[dict] - key-specific model budget in USD. Example - {"text-davinci-002": 0.5, "gpt-3.5-turbo": 0.5}. IF null or {} then no model specific budget.
+    - model_rpm_limit: Optional[dict] - key-specific model rpm limit. Example - {"text-davinci-002": 1000, "gpt-3.5-turbo": 1000}. IF null or {} then no model specific rpm limit.
+    - model_tpm_limit: Optional[dict] - key-specific model tpm limit. Example - {"text-davinci-002": 1000, "gpt-3.5-turbo": 1000}. IF null or {} then no model specific tpm limit.
     Examples:
     1. Allow users to turn on/off pii masking
         # get non default values for key
         non_default_values = {}
         for k, v in data_json.items():
+            # this field gets stored in metadata
+            if key == "model_rpm_limit" or key == "model_tpm_limit":
+                continue
             if v is not None and v not in (
                 [],
                 {},
             key_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
             non_default_values["budget_reset_at"] = key_reset_at
+        # Update metadata for virtual Key
+        if data.model_tpm_limit:
+            _metadata = existing_key_row.metadata or {}
+            if "model_tpm_limit" not in _metadata:
+                _metadata["model_tpm_limit"] = {}
+            _metadata["model_tpm_limit"].update(data.model_tpm_limit)
+            non_default_values["metadata"] = _metadata
+            non_default_values.pop("model_tpm_limit", None)
+        if data.model_rpm_limit:
+            _metadata = existing_key_row.metadata or {}
+            if "model_rpm_limit" not in _metadata:
+                _metadata["model_rpm_limit"] = {}
+            _metadata["model_rpm_limit"].update(data.model_rpm_limit)
+            non_default_values["metadata"] = _metadata
+            non_default_values.pop("model_rpm_limit", None)
         response = await prisma_client.update_data(
             token=key, data={**non_default_values, "token": key}
         )
     allowed_cache_controls: Optional[list] = [],
     permissions: Optional[dict] = {},
     model_max_budget: Optional[dict] = {},
+    model_rpm_limit: Optional[dict] = {},
+    model_tpm_limit: Optional[dict] = {},
     teams: Optional[list] = None,
     organization_id: Optional[str] = None,
     table_name: Optional[Literal["key", "user"]] = None,
     aliases_json = json.dumps(aliases)
     config_json = json.dumps(config)
     permissions_json = json.dumps(permissions)
+    # Add model_rpm_limit and model_tpm_limit to metadata
+    if model_rpm_limit is not None:
+        metadata = metadata or {}
+        metadata["model_rpm_limit"] = model_rpm_limit
+    if model_tpm_limit is not None:
+        metadata = metadata or {}
+        metadata["model_tpm_limit"] = model_tpm_limit
     metadata_json = json.dumps(metadata)
     model_max_budget_json = json.dumps(model_max_budget)
     user_role = user_role

.venv/lib/python3.12/site-packages/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py CHANGED Viewed

@@ -3,7 +3,7 @@ import asyncio
 import json
 import traceback
 from base64 import b64encode
-from typing import List, Optional
 import httpx
 from fastapi import (
@@ -267,12 +267,25 @@ def forward_headers_from_request(
     return headers
 async def pass_through_request(
     request: Request,
     target: str,
     custom_headers: dict,
     user_api_key_dict: UserAPIKeyAuth,
     forward_headers: Optional[bool] = False,
 ):
     try:
         import time
@@ -291,7 +304,7 @@ async def pass_through_request(
         body_str = request_body.decode()
         try:
             _parsed_body = ast.literal_eval(body_str)
-        except:
             _parsed_body = json.loads(body_str)
         verbose_proxy_logger.debug(
@@ -307,25 +320,10 @@ async def pass_through_request(
             call_type="pass_through_endpoint",
         )
-        async_client = httpx.AsyncClient()
-        response = await async_client.request(
-            method=request.method,
-            url=url,
-            headers=headers,
-            params=request.query_params,
-            json=_parsed_body,
-        )
-        if response.status_code >= 300:
-            raise HTTPException(status_code=response.status_code, detail=response.text)
-        content = await response.aread()
-        ## LOG SUCCESS
-        start_time = time.time()
-        end_time = time.time()
         # create logging object
         logging_obj = Logging(
             model="unknown",
             messages=[{"role": "user", "content": "no-message-pass-through-endpoint"}],
@@ -335,6 +333,7 @@ async def pass_through_request(
             litellm_call_id=str(uuid.uuid4()),
             function_id="1245",
         )
         # done for supporting 'parallel_request_limiter.py' with pass-through endpoints
         kwargs = {
             "litellm_params": {
@@ -355,6 +354,103 @@ async def pass_through_request(
             call_type="pass_through_endpoint",
         )
         await logging_obj.async_success_handler(
             result="",
             start_time=start_time,
@@ -365,7 +461,7 @@ async def pass_through_request(
         return Response(
             content=content,
             status_code=response.status_code,
-            headers=dict(response.headers),
         )
     except Exception as e:
         verbose_proxy_logger.exception(
@@ -423,19 +519,25 @@ def create_pass_through_route(
             )
     except Exception:
-        verbose_proxy_logger.warning("Defaulting to target being a url.")
-        async def endpoint_func(
             request: Request,
             fastapi_response: Response,
             user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
         ):
-            return await pass_through_request(
                 request=request,
                 target=target,
                 custom_headers=custom_headers or {},
                 user_api_key_dict=user_api_key_dict,
                 forward_headers=_forward_headers,
             )
     return endpoint_func

 import json
 import traceback
 from base64 import b64encode
+from typing import AsyncIterable, List, Optional
 import httpx
 from fastapi import (
     return headers
+def get_response_headers(headers: httpx.Headers) -> dict:
+    excluded_headers = {"transfer-encoding", "content-encoding"}
+    return_headers = {
+        key: value
+        for key, value in headers.items()
+        if key.lower() not in excluded_headers
+    }
+    return return_headers
 async def pass_through_request(
     request: Request,
     target: str,
     custom_headers: dict,
     user_api_key_dict: UserAPIKeyAuth,
     forward_headers: Optional[bool] = False,
+    query_params: Optional[dict] = None,
+    stream: Optional[bool] = None,
 ):
     try:
         import time
         body_str = request_body.decode()
         try:
             _parsed_body = ast.literal_eval(body_str)
+        except Exception:
             _parsed_body = json.loads(body_str)
         verbose_proxy_logger.debug(
             call_type="pass_through_endpoint",
         )
+        async_client = httpx.AsyncClient(timeout=600)
         # create logging object
+        start_time = time.time()
         logging_obj = Logging(
             model="unknown",
             messages=[{"role": "user", "content": "no-message-pass-through-endpoint"}],
             litellm_call_id=str(uuid.uuid4()),
             function_id="1245",
         )
         # done for supporting 'parallel_request_limiter.py' with pass-through endpoints
         kwargs = {
             "litellm_params": {
             call_type="pass_through_endpoint",
         )
+        # combine url with query params for logging
+        requested_query_params = query_params or request.query_params.__dict__
+        requested_query_params_str = "&".join(
+            f"{k}={v}" for k, v in requested_query_params.items()
+        )
+        if "?" in str(url):
+            logging_url = str(url) + "&" + requested_query_params_str
+        else:
+            logging_url = str(url) + "?" + requested_query_params_str
+        logging_obj.pre_call(
+            input=[{"role": "user", "content": "no-message-pass-through-endpoint"}],
+            api_key="",
+            additional_args={
+                "complete_input_dict": _parsed_body,
+                "api_base": logging_url,
+                "headers": headers,
+            },
+        )
+        if stream:
+            req = async_client.build_request(
+                "POST",
+                url,
+                json=_parsed_body,
+                params=requested_query_params,
+                headers=headers,
+            )
+            response = await async_client.send(req, stream=stream)
+            try:
+                response.raise_for_status()
+            except httpx.HTTPStatusError as e:
+                raise HTTPException(
+                    status_code=e.response.status_code, detail=await e.response.aread()
+                )
+            # Create an async generator to yield the response content
+            async def stream_response() -> AsyncIterable[bytes]:
+                async for chunk in response.aiter_bytes():
+                    yield chunk
+            return StreamingResponse(
+                stream_response(),
+                headers=get_response_headers(response.headers),
+                status_code=response.status_code,
+            )
+        response = await async_client.request(
+            method=request.method,
+            url=url,
+            headers=headers,
+            params=requested_query_params,
+            json=_parsed_body,
+        )
+        if (
+            response.headers.get("content-type") is not None
+            and response.headers["content-type"] == "text/event-stream"
+        ):
+            try:
+                response.raise_for_status()
+            except httpx.HTTPStatusError as e:
+                raise HTTPException(
+                    status_code=e.response.status_code, detail=await e.response.aread()
+                )
+            # streaming response
+            # Create an async generator to yield the response content
+            async def stream_response() -> AsyncIterable[bytes]:
+                async for chunk in response.aiter_bytes():
+                    yield chunk
+            return StreamingResponse(
+                stream_response(),
+                headers=get_response_headers(response.headers),
+                status_code=response.status_code,
+            )
+        try:
+            response.raise_for_status()
+        except httpx.HTTPStatusError as e:
+            raise HTTPException(
+                status_code=e.response.status_code, detail=e.response.text
+            )
+        if response.status_code >= 300:
+            raise HTTPException(status_code=response.status_code, detail=response.text)
+        content = await response.aread()
+        ## LOG SUCCESS
+        end_time = time.time()
         await logging_obj.async_success_handler(
             result="",
             start_time=start_time,
         return Response(
             content=content,
             status_code=response.status_code,
+            headers=get_response_headers(response.headers),
         )
     except Exception as e:
         verbose_proxy_logger.exception(
             )
     except Exception:
+        verbose_proxy_logger.debug("Defaulting to target being a url.")
+        async def endpoint_func(  # type: ignore
             request: Request,
             fastapi_response: Response,
             user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+            query_params: Optional[dict] = None,
+            stream: Optional[
+                bool
+            ] = None,  # if pass-through endpoint is a streaming request
         ):
+            return await pass_through_request(  # type: ignore
                 request=request,
                 target=target,
                 custom_headers=custom_headers or {},
                 user_api_key_dict=user_api_key_dict,
                 forward_headers=_forward_headers,
+                query_params=query_params,
+                stream=stream,
             )
     return endpoint_func

.venv/lib/python3.12/site-packages/litellm/proxy/proxy_config.yaml CHANGED Viewed

@@ -4,10 +4,14 @@ model_list:
       model: openai/fake
       api_key: fake-key
       api_base: https://exampleopenaiendpoint-production.up.railway.app/
   - model_name: fireworks-llama-v3-70b-instruct
     litellm_params:
       model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct
       api_key: "os.environ/FIREWORKS"
   - model_name: "*"
     litellm_params:
       model: "*"
@@ -42,7 +46,5 @@ general_settings:
 litellm_settings:
   fallbacks: [{"gemini-1.5-pro-001": ["gpt-4o"]}]
-  callbacks: ["gcs_bucket"]
-  success_callback: ["langfuse"]
   langfuse_default_tags: ["cache_hit", "cache_key", "user_api_key_alias", "user_api_key_team_alias"]
-  cache: True

       model: openai/fake
       api_key: fake-key
       api_base: https://exampleopenaiendpoint-production.up.railway.app/
+    model_info:
+      access_groups: ["beta-models"]
   - model_name: fireworks-llama-v3-70b-instruct
     litellm_params:
       model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct
       api_key: "os.environ/FIREWORKS"
+    model_info:
+      access_groups: ["beta-models"]
   - model_name: "*"
     litellm_params:
       model: "*"
 litellm_settings:
   fallbacks: [{"gemini-1.5-pro-001": ["gpt-4o"]}]
+  success_callback: ["langfuse", "prometheus"]
   langfuse_default_tags: ["cache_hit", "cache_key", "user_api_key_alias", "user_api_key_team_alias"]

.venv/lib/python3.12/site-packages/litellm/proxy/proxy_server.py CHANGED Viewed

@@ -148,6 +148,10 @@ from litellm.proxy.common_utils.admin_ui_utils import (
     html_form,
     show_missing_vars_in_env,
 )
 from litellm.proxy.common_utils.debug_utils import init_verbose_loggers
 from litellm.proxy.common_utils.debug_utils import router as debugging_endpoints_router
 from litellm.proxy.common_utils.encrypt_decrypt_utils import (
@@ -158,7 +162,6 @@ from litellm.proxy.common_utils.http_parsing_utils import (
     _read_request_body,
     check_file_size_under_limit,
 )
-from litellm.proxy.common_utils.init_callbacks import initialize_callbacks_on_proxy
 from litellm.proxy.common_utils.load_config_utils import get_file_contents_from_s3
 from litellm.proxy.common_utils.openai_endpoint_utils import (
     remove_sensitive_info_from_deployment,
@@ -227,6 +230,9 @@ from litellm.proxy.utils import (
     send_email,
     update_spend,
 )
 from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import router as vertex_router
 from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import set_default_vertex_config
 from litellm.router import (
@@ -503,6 +509,7 @@ def get_custom_headers(
     model_region: Optional[str] = None,
     response_cost: Optional[Union[float, str]] = None,
     fastest_response_batch_completion: Optional[bool] = None,
     **kwargs,
 ) -> dict:
     exclude_values = {"", None}
@@ -523,6 +530,12 @@ def get_custom_headers(
         ),
         **{k: str(v) for k, v in kwargs.items()},
     }
     try:
         return {
             key: value for key, value in headers.items() if value not in exclude_values
@@ -3107,6 +3120,7 @@ async def chat_completion(
                 response_cost=response_cost,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
                 fastest_response_batch_completion=fastest_response_batch_completion,
                 **additional_headers,
             )
             selected_data_generator = select_data_generator(
@@ -3141,6 +3155,7 @@ async def chat_completion(
                 response_cost=response_cost,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
                 fastest_response_batch_completion=fastest_response_batch_completion,
                 **additional_headers,
             )
         )
@@ -3322,6 +3337,7 @@ async def completion(
                 api_base=api_base,
                 version=version,
                 response_cost=response_cost,
             )
             selected_data_generator = select_data_generator(
                 response=response,
@@ -3343,6 +3359,7 @@ async def completion(
                 api_base=api_base,
                 version=version,
                 response_cost=response_cost,
             )
         )
         await check_response_size_is_safe(response=response)
@@ -3550,6 +3567,7 @@ async def embeddings(
                 response_cost=response_cost,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
                 call_id=litellm_call_id,
             )
         )
         await check_response_size_is_safe(response=response)
@@ -3676,6 +3694,7 @@ async def image_generation(
                 response_cost=response_cost,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
                 call_id=litellm_call_id,
             )
         )
@@ -3797,6 +3816,7 @@ async def audio_speech(
             model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
             fastest_response_batch_completion=None,
             call_id=litellm_call_id,
         )
         selected_data_generator = select_data_generator(
@@ -3934,6 +3954,7 @@ async def audio_transcriptions(
                 response_cost=response_cost,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
                 call_id=litellm_call_id,
             )
         )
@@ -4037,6 +4058,7 @@ async def get_assistants(
                 api_base=api_base,
                 version=version,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
             )
         )
@@ -4132,6 +4154,7 @@ async def create_assistant(
                 api_base=api_base,
                 version=version,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
             )
         )
@@ -4227,6 +4250,7 @@ async def delete_assistant(
                 api_base=api_base,
                 version=version,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
             )
         )
@@ -4322,6 +4346,7 @@ async def create_threads(
                 api_base=api_base,
                 version=version,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
             )
         )
@@ -4416,6 +4441,7 @@ async def get_thread(
                 api_base=api_base,
                 version=version,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
             )
         )
@@ -4513,6 +4539,7 @@ async def add_messages(
                 api_base=api_base,
                 version=version,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
             )
         )
@@ -4606,6 +4633,7 @@ async def get_messages(
                 api_base=api_base,
                 version=version,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
             )
         )
@@ -4713,6 +4741,7 @@ async def run_thread(
                 api_base=api_base,
                 version=version,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
             )
         )
@@ -4835,6 +4864,7 @@ async def create_batch(
                 api_base=api_base,
                 version=version,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
             )
         )
@@ -4930,6 +4960,7 @@ async def retrieve_batch(
                 api_base=api_base,
                 version=version,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
             )
         )
@@ -5148,6 +5179,7 @@ async def moderations(
                 api_base=api_base,
                 version=version,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
             )
         )
@@ -5317,6 +5349,7 @@ async def anthropic_response(
                 api_base=api_base,
                 version=version,
                 response_cost=response_cost,
             )
         )
@@ -9704,6 +9737,7 @@ def cleanup_router_config_variables():
 app.include_router(router)
 app.include_router(fine_tuning_router)
 app.include_router(vertex_router)
 app.include_router(pass_through_router)
 app.include_router(health_router)
 app.include_router(key_management_router)

     html_form,
     show_missing_vars_in_env,
 )
+from litellm.proxy.common_utils.callback_utils import (
+    get_remaining_tokens_and_requests_from_request_data,
+    initialize_callbacks_on_proxy,
+)
 from litellm.proxy.common_utils.debug_utils import init_verbose_loggers
 from litellm.proxy.common_utils.debug_utils import router as debugging_endpoints_router
 from litellm.proxy.common_utils.encrypt_decrypt_utils import (
     _read_request_body,
     check_file_size_under_limit,
 )
 from litellm.proxy.common_utils.load_config_utils import get_file_contents_from_s3
 from litellm.proxy.common_utils.openai_endpoint_utils import (
     remove_sensitive_info_from_deployment,
     send_email,
     update_spend,
 )
+from litellm.proxy.vertex_ai_endpoints.google_ai_studio_endpoints import (
+    router as gemini_router,
+)
 from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import router as vertex_router
 from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import set_default_vertex_config
 from litellm.router import (
     model_region: Optional[str] = None,
     response_cost: Optional[Union[float, str]] = None,
     fastest_response_batch_completion: Optional[bool] = None,
+    request_data: Optional[dict] = {},
     **kwargs,
 ) -> dict:
     exclude_values = {"", None}
         ),
         **{k: str(v) for k, v in kwargs.items()},
     }
+    if request_data:
+        remaining_tokens_header = get_remaining_tokens_and_requests_from_request_data(
+            request_data
+        )
+        headers.update(remaining_tokens_header)
     try:
         return {
             key: value for key, value in headers.items() if value not in exclude_values
                 response_cost=response_cost,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
                 fastest_response_batch_completion=fastest_response_batch_completion,
+                request_data=data,
                 **additional_headers,
             )
             selected_data_generator = select_data_generator(
                 response_cost=response_cost,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
                 fastest_response_batch_completion=fastest_response_batch_completion,
+                request_data=data,
                 **additional_headers,
             )
         )
                 api_base=api_base,
                 version=version,
                 response_cost=response_cost,
+                request_data=data,
             )
             selected_data_generator = select_data_generator(
                 response=response,
                 api_base=api_base,
                 version=version,
                 response_cost=response_cost,
+                request_data=data,
             )
         )
         await check_response_size_is_safe(response=response)
                 response_cost=response_cost,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
                 call_id=litellm_call_id,
+                request_data=data,
             )
         )
         await check_response_size_is_safe(response=response)
                 response_cost=response_cost,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
                 call_id=litellm_call_id,
+                request_data=data,
             )
         )
             model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
             fastest_response_batch_completion=None,
             call_id=litellm_call_id,
+            request_data=data,
         )
         selected_data_generator = select_data_generator(
                 response_cost=response_cost,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
                 call_id=litellm_call_id,
+                request_data=data,
             )
         )
                 api_base=api_base,
                 version=version,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
+                request_data=data,
             )
         )
                 api_base=api_base,
                 version=version,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
+                request_data=data,
             )
         )
                 api_base=api_base,
                 version=version,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
+                request_data=data,
             )
         )
                 api_base=api_base,
                 version=version,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
+                request_data=data,
             )
         )
                 api_base=api_base,
                 version=version,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
+                request_data=data,
             )
         )
                 api_base=api_base,
                 version=version,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
+                request_data=data,
             )
         )
                 api_base=api_base,
                 version=version,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
+                request_data=data,
             )
         )
                 api_base=api_base,
                 version=version,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
+                request_data=data,
             )
         )
                 api_base=api_base,
                 version=version,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
+                request_data=data,
             )
         )
                 api_base=api_base,
                 version=version,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
+                request_data=data,
             )
         )
                 api_base=api_base,
                 version=version,
                 model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
+                request_data=data,
             )
         )
                 api_base=api_base,
                 version=version,
                 response_cost=response_cost,
+                request_data=data,
             )
         )
 app.include_router(router)
 app.include_router(fine_tuning_router)
 app.include_router(vertex_router)
+app.include_router(gemini_router)
 app.include_router(pass_through_router)
 app.include_router(health_router)
 app.include_router(key_management_router)

.venv/lib/python3.12/site-packages/litellm/proxy/vertex_ai_endpoints/google_ai_studio_endpoints.py ADDED Viewed

	@@ -0,0 +1,138 @@

+"""
+What is this?
+Google AI Studio Pass-Through Endpoints
+"""
+"""
+1. Create pass-through endpoints for any LITELLM_BASE_URL/gemini/<endpoint> map to https://generativelanguage.googleapis.com/<endpoint>
+"""
+import ast
+import asyncio
+import traceback
+from datetime import datetime, timedelta, timezone
+from typing import List, Optional
+from urllib.parse import urlencode
+import fastapi
+import httpx
+from fastapi import (
+    APIRouter,
+    Depends,
+    File,
+    Form,
+    Header,
+    HTTPException,
+    Request,
+    Response,
+    UploadFile,
+    status,
+)
+from starlette.datastructures import QueryParams
+import litellm
+from litellm._logging import verbose_proxy_logger
+from litellm.batches.main import FileObject
+from litellm.fine_tuning.main import vertex_fine_tuning_apis_instance
+from litellm.proxy._types import *
+from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
+    create_pass_through_route,
+)
+router = APIRouter()
+default_vertex_config = None
+@router.api_route("/gemini/{endpoint:path}", methods=["GET", "POST", "PUT", "DELETE"])
+async def gemini_proxy_route(
+    endpoint: str,
+    request: Request,
+    fastapi_response: Response,
+):
+    ## CHECK FOR LITELLM API KEY IN THE QUERY PARAMS - ?..key=LITELLM_API_KEY
+    api_key = request.query_params.get("key")
+    user_api_key_dict = await user_api_key_auth(
+        request=request, api_key="Bearer {}".format(api_key)
+    )
+    base_target_url = "https://generativelanguage.googleapis.com"
+    encoded_endpoint = httpx.URL(endpoint).path
+    # Ensure endpoint starts with '/' for proper URL construction
+    if not encoded_endpoint.startswith("/"):
+        encoded_endpoint = "/" + encoded_endpoint
+    # Construct the full target URL using httpx
+    base_url = httpx.URL(base_target_url)
+    updated_url = base_url.copy_with(path=encoded_endpoint)
+    # Add or update query parameters
+    gemini_api_key = litellm.utils.get_secret(secret_name="GEMINI_API_KEY")
+    # Merge query parameters, giving precedence to those in updated_url
+    merged_params = dict(request.query_params)
+    merged_params.update({"key": gemini_api_key})
+    ## check for streaming
+    is_streaming_request = False
+    if "stream" in str(updated_url):
+        is_streaming_request = True
+    ## CREATE PASS-THROUGH
+    endpoint_func = create_pass_through_route(
+        endpoint=endpoint,
+        target=str(updated_url),
+    )  # dynamically construct pass-through endpoint based on incoming path
+    received_value = await endpoint_func(
+        request,
+        fastapi_response,
+        user_api_key_dict,
+        query_params=merged_params,
+        stream=is_streaming_request,
+    )
+    return received_value
+@router.api_route("/cohere/{endpoint:path}", methods=["GET", "POST", "PUT", "DELETE"])
+async def cohere_proxy_route(
+    endpoint: str,
+    request: Request,
+    fastapi_response: Response,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    base_target_url = "https://api.cohere.com"
+    encoded_endpoint = httpx.URL(endpoint).path
+    # Ensure endpoint starts with '/' for proper URL construction
+    if not encoded_endpoint.startswith("/"):
+        encoded_endpoint = "/" + encoded_endpoint
+    # Construct the full target URL using httpx
+    base_url = httpx.URL(base_target_url)
+    updated_url = base_url.copy_with(path=encoded_endpoint)
+    # Add or update query parameters
+    cohere_api_key = litellm.utils.get_secret(secret_name="COHERE_API_KEY")
+    ## check for streaming
+    is_streaming_request = False
+    if "stream" in str(updated_url):
+        is_streaming_request = True
+    ## CREATE PASS-THROUGH
+    endpoint_func = create_pass_through_route(
+        endpoint=endpoint,
+        target=str(updated_url),
+        custom_headers={"Authorization": "Bearer {}".format(cohere_api_key)},
+    )  # dynamically construct pass-through endpoint based on incoming path
+    received_value = await endpoint_func(
+        request,
+        fastapi_response,
+        user_api_key_dict,
+        stream=is_streaming_request,
+    )
+    return received_value

.venv/lib/python3.12/site-packages/litellm/router.py CHANGED Viewed

@@ -421,6 +421,7 @@ class Router:
             routing_strategy=routing_strategy,
             routing_strategy_args=routing_strategy_args,
         )
         ## USAGE TRACKING ##
         if isinstance(litellm._async_success_callback, list):
             litellm._async_success_callback.append(self.deployment_callback_on_success)
@@ -4116,6 +4117,22 @@ class Router:
             return self.model_list
         return None
     def get_settings(self):
         """
         Get router settings method, returns a dictionary of the settings and their values.

             routing_strategy=routing_strategy,
             routing_strategy_args=routing_strategy_args,
         )
+        self.access_groups = None
         ## USAGE TRACKING ##
         if isinstance(litellm._async_success_callback, list):
             litellm._async_success_callback.append(self.deployment_callback_on_success)
             return self.model_list
         return None
+    def get_model_access_groups(self):
+        from collections import defaultdict
+        access_groups = defaultdict(list)
+        if self.access_groups:
+            return self.access_groups
+        if self.model_list:
+            for m in self.model_list:
+                for group in m.get("model_info", {}).get("access_groups", []):
+                    model_name = m["model_name"]
+                    access_groups[group].append(model_name)
+        # set access groups
+        self.access_groups = access_groups
+        return access_groups
     def get_settings(self):
         """
         Get router settings method, returns a dictionary of the settings and their values.

.venv/lib/python3.12/site-packages/litellm/tests/test_anthropic_completion.py CHANGED Viewed

@@ -10,6 +10,7 @@ from dotenv import load_dotenv
 import litellm.types
 import litellm.types.utils
 load_dotenv()
 import io
@@ -150,6 +151,74 @@ def test_anthropic_completion_e2e(stream):
         assert message_stop_received is True
 @pytest.mark.asyncio
 async def test_anthropic_router_completion_e2e():
     litellm.set_verbose = True
@@ -275,4 +344,4 @@ def test_anthropic_tool_calling_translation():
     print(translated_params["messages"])
     assert len(translated_params["messages"]) > 0
-    assert translated_params["messages"][0]["role"] == "user"

 import litellm.types
 import litellm.types.utils
+from litellm.llms.anthropic import ModelResponseIterator
 load_dotenv()
 import io
         assert message_stop_received is True
+anthropic_chunk_list = [
+    {"type": "content_block_start", "index": 0, "content_block": {"type": "text", "text": ""}},
+    {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": "To"}},
+    {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " answer"}},
+    {"type": "content_block_delta", "index": 0,
+     "delta": {"type": "text_delta", "text": " your question about the weather"}},
+    {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " in Boston and Los"}},
+    {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " Angeles today, I'll"}},
+    {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " need to"}},
+    {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " use"}},
+    {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " the"}},
+    {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " get_current_weather"}},
+    {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " function"}},
+    {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " for"}},
+    {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " both"}},
+    {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " cities"}},
+    {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": ". Let"}},
+    {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " me fetch"}},
+    {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " that"}},
+    {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " information"}},
+    {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " for"}},
+    {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": " you."}},
+    {"type": "content_block_stop", "index": 0},
+    {"type": "content_block_start", "index": 1,
+     "content_block": {"type": "tool_use", "id": "toolu_12345", "name": "get_current_weather", "input": {}}},
+    {"type": "content_block_delta", "index": 1, "delta": {"type": "input_json_delta", "partial_json": ""}},
+    {"type": "content_block_delta", "index": 1, "delta": {"type": "input_json_delta", "partial_json": "{\"locat"}},
+    {"type": "content_block_delta", "index": 1, "delta": {"type": "input_json_delta", "partial_json": "ion\": \"Bos"}},
+    {"type": "content_block_delta", "index": 1, "delta": {"type": "input_json_delta", "partial_json": "ton, MA\"}"}},
+    {"type": "content_block_stop", "index": 1},
+    {"type": "content_block_start", "index": 2,
+     "content_block": {"type": "tool_use", "id": "toolu_023423423", "name": "get_current_weather", "input": {}}},
+    {"type": "content_block_delta", "index": 2, "delta": {"type": "input_json_delta", "partial_json": ""}},
+    {"type": "content_block_delta", "index": 2, "delta": {"type": "input_json_delta", "partial_json": "{\"l"}},
+    {"type": "content_block_delta", "index": 2, "delta": {"type": "input_json_delta", "partial_json": "oca"}},
+    {"type": "content_block_delta", "index": 2, "delta": {"type": "input_json_delta", "partial_json": "tio"}},
+    {"type": "content_block_delta", "index": 2, "delta": {"type": "input_json_delta", "partial_json": "n\": \"Lo"}},
+    {"type": "content_block_delta", "index": 2, "delta": {"type": "input_json_delta", "partial_json": "s Angel"}},
+    {"type": "content_block_delta", "index": 2, "delta": {"type": "input_json_delta", "partial_json": "es, CA\"}"}},
+    {"type": "content_block_stop", "index": 2},
+    {"type": "message_delta", "delta": {"stop_reason": "tool_use", "stop_sequence": None},
+     "usage": {"output_tokens": 137}},
+    {"type": "message_stop"}
+]
+def test_anthropic_tool_streaming():
+    """
+    OpenAI starts tool_use indexes at 0 for the first tool, regardless of preceding text.
+    Anthropic gives tool_use indexes starting at the first chunk, meaning they often start at 1
+    when they should start at 0
+    """
+    litellm.set_verbose = True
+    response_iter = ModelResponseIterator([], False)
+    # First index is 0, we'll start earlier because incrementing is easier
+    correct_tool_index = -1
+    for chunk in anthropic_chunk_list:
+        parsed_chunk = response_iter.chunk_parser(chunk)
+        if tool_use := parsed_chunk.get('tool_use'):
+            # We only increment when a new block starts
+            if tool_use.get('id') is not None:
+                correct_tool_index += 1
+            assert tool_use['index'] == correct_tool_index
 @pytest.mark.asyncio
 async def test_anthropic_router_completion_e2e():
     litellm.set_verbose = True
     print(translated_params["messages"])
     assert len(translated_params["messages"]) > 0
+    assert translated_params["messages"][0]["role"] == "user"

.venv/lib/python3.12/site-packages/litellm/tests/test_key_generate_prisma.py CHANGED Viewed

@@ -2710,3 +2710,168 @@ async def test_custom_api_key_header_name(prisma_client):
         pass
     # this should pass because X-Litellm-Key is valid

         pass
     # this should pass because X-Litellm-Key is valid
+@pytest.mark.asyncio()
+async def test_generate_key_with_model_tpm_limit(prisma_client):
+    print("prisma client=", prisma_client)
+    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    await litellm.proxy.proxy_server.prisma_client.connect()
+    request = GenerateKeyRequest(
+        metadata={
+            "team": "litellm-team3",
+            "model_tpm_limit": {"gpt-4": 100},
+            "model_rpm_limit": {"gpt-4": 2},
+        }
+    )
+    key = await generate_key_fn(
+        data=request,
+        user_api_key_dict=UserAPIKeyAuth(
+            user_role=LitellmUserRoles.PROXY_ADMIN,
+            api_key="sk-1234",
+            user_id="1234",
+        ),
+    )
+    print(key)
+    generated_key = key.key
+    # use generated key to auth in
+    result = await info_key_fn(key=generated_key)
+    print("result from info_key_fn", result)
+    assert result["key"] == generated_key
+    print("\n info for key=", result["info"])
+    assert result["info"]["metadata"] == {
+        "team": "litellm-team3",
+        "model_tpm_limit": {"gpt-4": 100},
+        "model_rpm_limit": {"gpt-4": 2},
+    }
+    # Update model tpm_limit and rpm_limit
+    request = UpdateKeyRequest(
+        key=generated_key,
+        model_tpm_limit={"gpt-4": 200},
+        model_rpm_limit={"gpt-4": 3},
+    )
+    _request = Request(scope={"type": "http"})
+    _request._url = URL(url="/update/key")
+    await update_key_fn(data=request, request=_request)
+    result = await info_key_fn(key=generated_key)
+    print("result from info_key_fn", result)
+    assert result["key"] == generated_key
+    print("\n info for key=", result["info"])
+    assert result["info"]["metadata"] == {
+        "team": "litellm-team3",
+        "model_tpm_limit": {"gpt-4": 200},
+        "model_rpm_limit": {"gpt-4": 3},
+    }
+@pytest.mark.asyncio()
+async def test_team_access_groups(prisma_client):
+    """
+    Test team based model access groups
+    - Test calling a model in the access group  -> pass
+    - Test calling a model not in the access group -> fail
+    """
+    litellm.set_verbose = True
+    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    await litellm.proxy.proxy_server.prisma_client.connect()
+    # create router with access groups
+    litellm_router = litellm.Router(
+        model_list=[
+            {
+                "model_name": "gemini-pro-vision",
+                "litellm_params": {
+                    "model": "vertex_ai/gemini-1.0-pro-vision-001",
+                },
+                "model_info": {"access_groups": ["beta-models"]},
+            },
+            {
+                "model_name": "gpt-4o",
+                "litellm_params": {
+                    "model": "gpt-4o",
+                },
+                "model_info": {"access_groups": ["beta-models"]},
+            },
+        ]
+    )
+    setattr(litellm.proxy.proxy_server, "llm_router", litellm_router)
+    # Create team with models=["beta-models"]
+    team_request = NewTeamRequest(
+        team_alias="testing-team",
+        models=["beta-models"],
+    )
+    new_team_response = await new_team(
+        data=team_request,
+        user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN),
+        http_request=Request(scope={"type": "http"}),
+    )
+    print("new_team_response", new_team_response)
+    created_team_id = new_team_response["team_id"]
+    # create key with team_id=created_team_id
+    request = GenerateKeyRequest(
+        team_id=created_team_id,
+    )
+    key = await generate_key_fn(
+        data=request,
+        user_api_key_dict=UserAPIKeyAuth(
+            user_role=LitellmUserRoles.PROXY_ADMIN,
+            api_key="sk-1234",
+            user_id="1234",
+        ),
+    )
+    print(key)
+    generated_key = key.key
+    bearer_token = "Bearer " + generated_key
+    request = Request(scope={"type": "http"})
+    request._url = URL(url="/chat/completions")
+    for model in ["gpt-4o", "gemini-pro-vision"]:
+        # Expect these to pass
+        async def return_body():
+            return_string = f'{{"model": "{model}"}}'
+            # return string as bytes
+            return return_string.encode()
+        request.body = return_body
+        # use generated key to auth in
+        print(
+            "Bearer token being sent to user_api_key_auth() - {}".format(bearer_token)
+        )
+        result = await user_api_key_auth(request=request, api_key=bearer_token)
+    for model in ["gpt-4", "gpt-4o-mini", "gemini-experimental"]:
+        # Expect these to fail
+        async def return_body_2():
+            return_string = f'{{"model": "{model}"}}'
+            # return string as bytes
+            return return_string.encode()
+        request.body = return_body_2
+        # use generated key to auth in
+        print(
+            "Bearer token being sent to user_api_key_auth() - {}".format(bearer_token)
+        )
+        try:
+            result = await user_api_key_auth(request=request, api_key=bearer_token)
+            pytest.fail(f"This should have failed!. IT's an invalid model")
+        except Exception as e:
+            print("got exception", e)
+            assert (
+                "not allowed to call model" in e.message
+                and "Allowed team models" in e.message
+            )

.venv/lib/python3.12/site-packages/litellm/tests/test_least_busy_routing.py CHANGED Viewed

@@ -1,8 +1,13 @@
 #### What this tests ####
 #    This tests the router's ability to identify the least busy deployment
-import sys, os, asyncio, time, random
 import traceback
 from dotenv import load_dotenv
 load_dotenv()
@@ -12,10 +17,11 @@ sys.path.insert(
     0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import pytest
-from litellm import Router
 import litellm
-from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
 from litellm.caching import DualCache
 ### UNIT TESTS FOR LEAST BUSY LOGGING ###
@@ -123,6 +129,9 @@ def test_router_get_available_deployments():
     return_dict = router.cache.get_cache(key=cache_key)
     assert router.leastbusy_logger.logged_success == 1
     assert return_dict[1] == 10
     assert return_dict[2] == 54

 #### What this tests ####
 #    This tests the router's ability to identify the least busy deployment
+import asyncio
+import os
+import random
+import sys
+import time
 import traceback
 from dotenv import load_dotenv
 load_dotenv()
     0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import pytest
 import litellm
+from litellm import Router
 from litellm.caching import DualCache
+from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
 ### UNIT TESTS FOR LEAST BUSY LOGGING ###
     return_dict = router.cache.get_cache(key=cache_key)
+    # wait 2 seconds
+    time.sleep(2)
     assert router.leastbusy_logger.logged_success == 1
     assert return_dict[1] == 10
     assert return_dict[2] == 54

.venv/lib/python3.12/site-packages/litellm/tests/test_parallel_request_limiter.py CHANGED Viewed

@@ -908,3 +908,273 @@ async def test_bad_router_tpm_limit():
         )["current_tpm"]
         == 0
     )

         )["current_tpm"]
         == 0
     )
+@pytest.mark.asyncio
+async def test_bad_router_tpm_limit_per_model():
+    model_list = [
+        {
+            "model_name": "azure-model",
+            "litellm_params": {
+                "model": "azure/gpt-turbo",
+                "api_key": "os.environ/AZURE_FRANCE_API_KEY",
+                "api_base": "https://openai-france-1234.openai.azure.com",
+                "rpm": 1440,
+            },
+            "model_info": {"id": 1},
+        },
+        {
+            "model_name": "azure-model",
+            "litellm_params": {
+                "model": "azure/gpt-35-turbo",
+                "api_key": "os.environ/AZURE_EUROPE_API_KEY",
+                "api_base": "https://my-endpoint-europe-berri-992.openai.azure.com",
+                "rpm": 6,
+            },
+            "model_info": {"id": 2},
+        },
+    ]
+    router = Router(
+        model_list=model_list,
+        set_verbose=False,
+        num_retries=3,
+    )  # type: ignore
+    _api_key = "sk-12345"
+    _api_key = hash_token(_api_key)
+    model = "azure-model"
+    user_api_key_dict = UserAPIKeyAuth(
+        api_key=_api_key,
+        max_parallel_requests=10,
+        tpm_limit=10,
+        metadata={
+            "model_rpm_limit": {model: 5},
+            "model_tpm_limit": {model: 5},
+        },
+    )
+    local_cache = DualCache()
+    pl = ProxyLogging(user_api_key_cache=local_cache)
+    pl._init_litellm_callbacks()
+    print(f"litellm callbacks: {litellm.callbacks}")
+    parallel_request_handler = pl.max_parallel_request_limiter
+    await parallel_request_handler.async_pre_call_hook(
+        user_api_key_dict=user_api_key_dict,
+        cache=local_cache,
+        data={"model": model},
+        call_type="",
+    )
+    current_date = datetime.now().strftime("%Y-%m-%d")
+    current_hour = datetime.now().strftime("%H")
+    current_minute = datetime.now().strftime("%M")
+    precise_minute = f"{current_date}-{current_hour}-{current_minute}"
+    request_count_api_key = f"{_api_key}::{model}::{precise_minute}::request_count"
+    print(
+        "internal usage cache: ",
+        parallel_request_handler.internal_usage_cache.in_memory_cache.cache_dict,
+    )
+    assert (
+        parallel_request_handler.internal_usage_cache.get_cache(
+            key=request_count_api_key
+        )["current_requests"]
+        == 1
+    )
+    # bad call
+    try:
+        response = await router.acompletion(
+            model=model,
+            messages=[{"role": "user2", "content": "Write me a paragraph on the moon"}],
+            stream=True,
+            metadata={"user_api_key": _api_key},
+        )
+    except:
+        pass
+    await asyncio.sleep(1)  # success is done in a separate thread
+    assert (
+        parallel_request_handler.internal_usage_cache.get_cache(
+            key=request_count_api_key
+        )["current_tpm"]
+        == 0
+    )
+@pytest.mark.asyncio
+async def test_pre_call_hook_rpm_limits_per_model():
+    """
+    Test if error raised on hitting rpm limits for a given model
+    """
+    import logging
+    from litellm._logging import (
+        verbose_logger,
+        verbose_proxy_logger,
+        verbose_router_logger,
+    )
+    verbose_logger.setLevel(logging.DEBUG)
+    verbose_proxy_logger.setLevel(logging.DEBUG)
+    verbose_router_logger.setLevel(logging.DEBUG)
+    _api_key = "sk-12345"
+    _api_key = hash_token(_api_key)
+    user_api_key_dict = UserAPIKeyAuth(
+        api_key=_api_key,
+        max_parallel_requests=100,
+        tpm_limit=900000,
+        rpm_limit=100000,
+        metadata={
+            "model_rpm_limit": {"azure-model": 1},
+        },
+    )
+    local_cache = DualCache()
+    pl = ProxyLogging(user_api_key_cache=local_cache)
+    pl._init_litellm_callbacks()
+    print(f"litellm callbacks: {litellm.callbacks}")
+    parallel_request_handler = pl.max_parallel_request_limiter
+    await parallel_request_handler.async_pre_call_hook(
+        user_api_key_dict=user_api_key_dict, cache=local_cache, data={}, call_type=""
+    )
+    model = "azure-model"
+    kwargs = {
+        "model": model,
+        "litellm_params": {
+            "metadata": {"user_api_key": _api_key, "model_group": model}
+        },
+    }
+    await parallel_request_handler.async_log_success_event(
+        kwargs=kwargs,
+        response_obj="",
+        start_time="",
+        end_time="",
+    )
+    ## Expected cache val: {"current_requests": 0, "current_tpm": 0, "current_rpm": 1}
+    try:
+        await parallel_request_handler.async_pre_call_hook(
+            user_api_key_dict=user_api_key_dict,
+            cache=local_cache,
+            data={"model": model},
+            call_type="",
+        )
+        pytest.fail(f"Expected call to fail")
+    except Exception as e:
+        assert e.status_code == 429
+        print("got error=", e)
+        assert (
+            "limit reached Hit RPM limit for model: azure-model on api_key: c11e7177eb60c80cf983ddf8ca98f2dc1272d4c612204ce9bedd2460b18939cc"
+            in str(e)
+        )
+@pytest.mark.asyncio
+async def test_pre_call_hook_tpm_limits_per_model():
+    """
+    Test if error raised on hitting tpm limits for a given model
+    """
+    import logging
+    from litellm._logging import (
+        verbose_logger,
+        verbose_proxy_logger,
+        verbose_router_logger,
+    )
+    verbose_logger.setLevel(logging.DEBUG)
+    verbose_proxy_logger.setLevel(logging.DEBUG)
+    verbose_router_logger.setLevel(logging.DEBUG)
+    _api_key = "sk-12345"
+    _api_key = hash_token(_api_key)
+    user_api_key_dict = UserAPIKeyAuth(
+        api_key=_api_key,
+        max_parallel_requests=100,
+        tpm_limit=900000,
+        rpm_limit=100000,
+        metadata={
+            "model_tpm_limit": {"azure-model": 1},
+            "model_rpm_limit": {"azure-model": 100},
+        },
+    )
+    local_cache = DualCache()
+    pl = ProxyLogging(user_api_key_cache=local_cache)
+    pl._init_litellm_callbacks()
+    print(f"litellm callbacks: {litellm.callbacks}")
+    parallel_request_handler = pl.max_parallel_request_limiter
+    model = "azure-model"
+    await parallel_request_handler.async_pre_call_hook(
+        user_api_key_dict=user_api_key_dict,
+        cache=local_cache,
+        data={"model": model},
+        call_type="",
+    )
+    kwargs = {
+        "model": model,
+        "litellm_params": {
+            "metadata": {"user_api_key": _api_key, "model_group": model}
+        },
+    }
+    await parallel_request_handler.async_log_success_event(
+        kwargs=kwargs,
+        response_obj=litellm.ModelResponse(usage=litellm.Usage(total_tokens=11)),
+        start_time="",
+        end_time="",
+    )
+    current_date = datetime.now().strftime("%Y-%m-%d")
+    current_hour = datetime.now().strftime("%H")
+    current_minute = datetime.now().strftime("%M")
+    precise_minute = f"{current_date}-{current_hour}-{current_minute}"
+    request_count_api_key = f"{_api_key}::{model}::{precise_minute}::request_count"
+    print(
+        "internal usage cache: ",
+        parallel_request_handler.internal_usage_cache.in_memory_cache.cache_dict,
+    )
+    assert (
+        parallel_request_handler.internal_usage_cache.get_cache(
+            key=request_count_api_key
+        )["current_tpm"]
+        == 11
+    )
+    assert (
+        parallel_request_handler.internal_usage_cache.get_cache(
+            key=request_count_api_key
+        )["current_rpm"]
+        == 1
+    )
+    ## Expected cache val: {"current_requests": 0, "current_tpm": 11, "current_rpm": "1"}
+    try:
+        await parallel_request_handler.async_pre_call_hook(
+            user_api_key_dict=user_api_key_dict,
+            cache=local_cache,
+            data={"model": model},
+            call_type="",
+        )
+        pytest.fail(f"Expected call to fail")
+    except Exception as e:
+        assert e.status_code == 429
+        print("got error=", e)
+        assert (
+            "request limit reached Hit TPM limit for model: azure-model on api_key"
+            in str(e)
+        )

.venv/lib/python3.12/site-packages/litellm/tests/test_pass_through_endpoints.py CHANGED Viewed

@@ -11,6 +11,7 @@ sys.path.insert(
 )  # Adds-the parent directory to the system path
 import asyncio
 import httpx
@@ -19,7 +20,9 @@ from litellm.proxy.proxy_server import app, initialize_pass_through_endpoints
 # Mock the async_client used in the pass_through_request function
 async def mock_request(*args, **kwargs):
-    return httpx.Response(200, json={"message": "Mocked response"})
 @pytest.fixture

 )  # Adds-the parent directory to the system path
 import asyncio
+from unittest.mock import Mock
 import httpx
 # Mock the async_client used in the pass_through_request function
 async def mock_request(*args, **kwargs):
+    mock_response = httpx.Response(200, json={"message": "Mocked response"})
+    mock_response.request = Mock(spec=httpx.Request)
+    return mock_response
 @pytest.fixture

.venv/lib/python3.12/site-packages/litellm/tests/test_proxy_server.py CHANGED Viewed

@@ -1166,3 +1166,52 @@ async def test_add_callback_via_key_litellm_pre_call_utils(prisma_client):
     assert new_data["success_callback"] == ["langfuse"]
     assert "langfuse_public_key" in new_data
     assert "langfuse_secret_key" in new_data

     assert new_data["success_callback"] == ["langfuse"]
     assert "langfuse_public_key" in new_data
     assert "langfuse_secret_key" in new_data
+@pytest.mark.asyncio
+async def test_gemini_pass_through_endpoint():
+    from starlette.datastructures import URL
+    from litellm.proxy.vertex_ai_endpoints.google_ai_studio_endpoints import (
+        Request,
+        Response,
+        gemini_proxy_route,
+    )
+    body = b"""
+        {
+            "contents": [{
+                "parts":[{
+                "text": "The quick brown fox jumps over the lazy dog."
+                }]
+                }]
+        }
+        """
+    # Construct the scope dictionary
+    scope = {
+        "type": "http",
+        "method": "POST",
+        "path": "/gemini/v1beta/models/gemini-1.5-flash:countTokens",
+        "query_string": b"key=sk-1234",
+        "headers": [
+            (b"content-type", b"application/json"),
+        ],
+    }
+    # Create a new Request object
+    async def async_receive():
+        return {"type": "http.request", "body": body, "more_body": False}
+    request = Request(
+        scope=scope,
+        receive=async_receive,
+    )
+    resp = await gemini_proxy_route(
+        endpoint="v1beta/models/gemini-1.5-flash:countTokens?key=sk-1234",
+        request=request,
+        fastapi_response=Response(),
+    )
+    print(resp.body)

.venv/lib/python3.12/site-packages/naptha_sdk-0.1.0.dist-info/RECORD CHANGED Viewed

@@ -1,6 +1,6 @@
 ../../../bin/naptha,sha256=caDahce1fdPiWx_e7NogUNj_x-F3pfMRk20TJJoEoxg,265
 naptha_sdk/agent_service.py,sha256=BtyqmyaxRVN0DeSUMgIO8XBLbMe2vGBEpwNcdQbLXgk,2538
-naptha_sdk/agent_service_engine.py,sha256=mhoVx1PEqPrsP87EPqea_k4awHFBybP26xK7kDoDlTA,6604
 naptha_sdk/app.py,sha256=CpI8ZCnt4nKBkNujORSAeOLJIKuInqrKtl6IOeynZXo,4116
 naptha_sdk/client/comms/http_client.py,sha256=JKz-EUHKVIIAaFDB3w5WHJ_Bv4og4YDGBXGglyDMFSE,8390
 naptha_sdk/client/comms/ws_client.py,sha256=KPSNNQ0pwaJBRUNPBZ4WAREqSo2QFejjtfXYaLAzMs0,10577

 ../../../bin/naptha,sha256=caDahce1fdPiWx_e7NogUNj_x-F3pfMRk20TJJoEoxg,265
 naptha_sdk/agent_service.py,sha256=BtyqmyaxRVN0DeSUMgIO8XBLbMe2vGBEpwNcdQbLXgk,2538
+naptha_sdk/agent_service_engine.py,sha256=vfHDoYNM1lP8hvQ5SmTP9qhKiqlyaLAktqsgIzw8mgU,6605
 naptha_sdk/app.py,sha256=CpI8ZCnt4nKBkNujORSAeOLJIKuInqrKtl6IOeynZXo,4116
 naptha_sdk/client/comms/http_client.py,sha256=JKz-EUHKVIIAaFDB3w5WHJ_Bv4og4YDGBXGglyDMFSE,8390
 naptha_sdk/client/comms/ws_client.py,sha256=KPSNNQ0pwaJBRUNPBZ4WAREqSo2QFejjtfXYaLAzMs0,10577

.venv/lib/python3.12/site-packages/naptha_sdk-0.1.0.dist-info/direct_url.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"url": "https://github.com/NapthaAI/naptha-sdk.git", "vcs_info": {"vcs": "git", "requested_revision": "feat/single-file", "commit_id": "~~d3a067838b217740a1c513a9565b1cb98d92fc5b~~"}}


1	+ {"url": "https://github.com/NapthaAI/naptha-sdk.git", "vcs_info": {"vcs": "git", "requested_revision": "feat/single-file", "commit_id": "2e4060d3d4fc493f57133b8253ed5c78335e9ef0"}}

.venv/lib/python3.12/site-packages/naptha_sdk/agent_service_engine.py CHANGED Viewed

@@ -22,7 +22,7 @@ async def run_agent_service(agent_service, flow_run, parameters) -> None:
                 await agent_service_engine.complete()
                 break
             time.sleep(3)
-        return agent_service_engine.agent_serviceresult[-1]
     except Exception as e:
         logger.error(f"An error occurred: {str(e)}")
         await agent_service_engine.fail()

                 await agent_service_engine.complete()
                 break
             time.sleep(3)
+        return agent_service_engine.agent_service_result[-1]
     except Exception as e:
         logger.error(f"An error occurred: {str(e)}")
         await agent_service_engine.fail()

.venv/src/naptha-sdk/naptha_sdk/agent_service_engine.py CHANGED Viewed

@@ -22,7 +22,7 @@ async def run_agent_service(agent_service, flow_run, parameters) -> None:
                 await agent_service_engine.complete()
                 break
             time.sleep(3)
-        return agent_service_engine.agent_serviceresult[-1]
     except Exception as e:
         logger.error(f"An error occurred: {str(e)}")
         await agent_service_engine.fail()

                 await agent_service_engine.complete()
                 break
             time.sleep(3)
+        return agent_service_engine.agent_service_result[-1]
     except Exception as e:
         logger.error(f"An error occurred: {str(e)}")
         await agent_service_engine.fail()

poetry.lock CHANGED Viewed

@@ -720,13 +720,13 @@ referencing = ">=0.31.0"
 [[package]]
 name = "litellm"
-version = "1.43.17"
 description = "Library to easily interface with LLM API providers"
 optional = false
 python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8"
 files = [
-    {file = "litellm-1.43.17-py3-none-any.whl", hash = "sha256:f5d68c812f087b49266631e09ae78b48b3ea03cd2e04e7760162a5919c5ccec7"},
-    {file = "litellm-1.43.17.tar.gz", hash = "sha256:8ac82b18bf6ae7c29627e8e5d89b183f075b32fb7027b17d2fb7d7d0b7cf8b7f"},
 ]
 [package.dependencies]
@@ -937,7 +937,7 @@ surrealdb = "^0.3.2"
 type = "git"
 url = "https://github.com/NapthaAI/naptha-sdk.git"
 reference = "feat/single-file"
-resolved_reference = "d3a067838b217740a1c513a9565b1cb98d92fc5b"
 [[package]]
 name = "openai"
@@ -1891,4 +1891,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools",
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.12"
-content-hash = "a28a3e26361a34d07cfbc6466d87f5ceeefdc61b5446e19c521c782568dc42bb"

 [[package]]
 name = "litellm"
+version = "1.43.18"
 description = "Library to easily interface with LLM API providers"
 optional = false
 python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8"
 files = [
+    {file = "litellm-1.43.18-py3-none-any.whl", hash = "sha256:68d853b4a0198a16e2260e4406a20f8d2e59bd903e019b7f3ba5a9f35ecc3e62"},
+    {file = "litellm-1.43.18.tar.gz", hash = "sha256:e22b20065b62663dd060be9da1e84ca05903931c41c49d35a98649ed09e79d29"},
 ]
 [package.dependencies]
 type = "git"
 url = "https://github.com/NapthaAI/naptha-sdk.git"
 reference = "feat/single-file"
+resolved_reference = "2e4060d3d4fc493f57133b8253ed5c78335e9ef0"
 [[package]]
 name = "openai"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.12"
+content-hash = "1cc6d2a25c4edcd20480814792d461fd48fd6f8ee09a6b9d721e77327e6bc9e1"

pyproject.toml CHANGED Viewed

@@ -7,8 +7,8 @@ readme = "README.md"
 [tool.poetry.dependencies]
 python = "^3.12"
-litellm = "^1.43.17"
 naptha-sdk = {git = "https://github.com/NapthaAI/naptha-sdk.git", rev = "feat/single-file"}
 [build-system]

 [tool.poetry.dependencies]
 python = "^3.12"
 naptha-sdk = {git = "https://github.com/NapthaAI/naptha-sdk.git", rev = "feat/single-file"}
+litellm = "^1.43.18"
 [build-system]