File size: 6,043 Bytes
246d201 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
from __future__ import annotations
import os
from typing import Any
from pydantic import BaseModel, Field, SecretStr
from openhands.core.logger import LOG_DIR
class LLMConfig(BaseModel):
"""Configuration for the LLM model.
Attributes:
model: The model to use.
api_key: The API key to use.
base_url: The base URL for the API. This is necessary for local LLMs. It is also used for Azure embeddings.
api_version: The version of the API.
embedding_model: The embedding model to use.
embedding_base_url: The base URL for the embedding API.
embedding_deployment_name: The name of the deployment for the embedding API. This is used for Azure OpenAI.
aws_access_key_id: The AWS access key ID.
aws_secret_access_key: The AWS secret access key.
aws_region_name: The AWS region name.
num_retries: The number of retries to attempt.
retry_multiplier: The multiplier for the exponential backoff.
retry_min_wait: The minimum time to wait between retries, in seconds. This is exponential backoff minimum. For models with very low limits, this can be set to 15-20.
retry_max_wait: The maximum time to wait between retries, in seconds. This is exponential backoff maximum.
timeout: The timeout for the API.
max_message_chars: The approximate max number of characters in the content of an event included in the prompt to the LLM. Larger observations are truncated.
temperature: The temperature for the API.
top_p: The top p for the API.
custom_llm_provider: The custom LLM provider to use. This is undocumented in openhands, and normally not used. It is documented on the litellm side.
max_input_tokens: The maximum number of input tokens. Note that this is currently unused, and the value at runtime is actually the total tokens in OpenAI (e.g. 128,000 tokens for GPT-4).
max_output_tokens: The maximum number of output tokens. This is sent to the LLM.
input_cost_per_token: The cost per input token. This will available in logs for the user to check.
output_cost_per_token: The cost per output token. This will available in logs for the user to check.
ollama_base_url: The base URL for the OLLAMA API.
drop_params: Drop any unmapped (unsupported) params without causing an exception.
modify_params: Modify params allows litellm to do transformations like adding a default message, when a message is empty.
disable_vision: If model is vision capable, this option allows to disable image processing (useful for cost reduction).
reasoning_effort: The effort to put into reasoning. This is a string that can be one of 'low', 'medium', 'high', or 'none'. Exclusive for o1 models.
caching_prompt: Use the prompt caching feature if provided by the LLM and supported by the provider.
log_completions: Whether to log LLM completions to the state.
log_completions_folder: The folder to log LLM completions to. Required if log_completions is True.
custom_tokenizer: A custom tokenizer to use for token counting.
native_tool_calling: Whether to use native tool calling if supported by the model. Can be True, False, or not set.
"""
model: str = Field(default='claude-3-5-sonnet-20241022')
api_key: SecretStr | None = Field(default=None)
base_url: str | None = Field(default=None)
api_version: str | None = Field(default=None)
embedding_model: str = Field(default='local')
embedding_base_url: str | None = Field(default=None)
embedding_deployment_name: str | None = Field(default=None)
aws_access_key_id: SecretStr | None = Field(default=None)
aws_secret_access_key: SecretStr | None = Field(default=None)
aws_region_name: str | None = Field(default=None)
openrouter_site_url: str = Field(default='https://docs.all-hands.dev/')
openrouter_app_name: str = Field(default='OpenHands')
num_retries: int = Field(default=8)
retry_multiplier: float = Field(default=2)
retry_min_wait: int = Field(default=15)
retry_max_wait: int = Field(default=120)
timeout: int | None = Field(default=None)
max_message_chars: int = Field(
default=30_000
) # maximum number of characters in an observation's content when sent to the llm
temperature: float = Field(default=0.0)
top_p: float = Field(default=1.0)
custom_llm_provider: str | None = Field(default=None)
max_input_tokens: int | None = Field(default=None)
max_output_tokens: int | None = Field(default=None)
input_cost_per_token: float | None = Field(default=None)
output_cost_per_token: float | None = Field(default=None)
ollama_base_url: str | None = Field(default=None)
# This setting can be sent in each call to litellm
drop_params: bool = Field(default=True)
# Note: this setting is actually global, unlike drop_params
modify_params: bool = Field(default=True)
disable_vision: bool | None = Field(default=None)
caching_prompt: bool = Field(default=True)
log_completions: bool = Field(default=False)
log_completions_folder: str = Field(default=os.path.join(LOG_DIR, 'completions'))
custom_tokenizer: str | None = Field(default=None)
native_tool_calling: bool | None = Field(default=None)
model_config = {'extra': 'forbid'}
def model_post_init(self, __context: Any):
"""Post-initialization hook to assign OpenRouter-related variables to environment variables.
This ensures that these values are accessible to litellm at runtime.
"""
super().model_post_init(__context)
# Assign OpenRouter-specific variables to environment variables
if self.openrouter_site_url:
os.environ['OR_SITE_URL'] = self.openrouter_site_url
if self.openrouter_app_name:
os.environ['OR_APP_NAME'] = self.openrouter_app_name
|