convosim-ui / models /databricks /custom_databricks_llm.py
ivnban27-ctl's picture
llm_endpoint_update (#11)
1e91476 verified
raw
history blame
2.49 kB
from typing import Any, Dict, Iterator, List, Mapping, Optional
from models.business_logic_utils.business_logic import process_app_request
from langchain_core.callbacks.manager import CallbackManagerForLLMRun
from langchain_core.language_models.llms import LLM
from langchain_core.outputs import GenerationChunk
class CustomDatabricksLLM(LLM):
endpoint_url: str
bearer_token: str
issue: str
language: str
temperature: float
texter_name: str = ""
"""The number of characters from the last message of the prompt to be echoed."""
def generate_databricks_request(self, prompt):
return {
"inputs": {
"conversation_id": [""],
"prompt": [prompt],
"issue": [self.issue],
"language": [self.language],
"temperature": [self.temperature],
"max_tokens": [128],
"texter_name": [self.texter_name]
}
}
def _call(
self,
prompt: str,
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> str:
request = self.generate_databricks_request(prompt)
output = process_app_request(request, self.endpoint_url, self.bearer_token)
return output['predictions'][0]['generated_text']
def _stream(
self,
prompt: str,
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> Iterator[GenerationChunk]:
output = self._call(prompt, stop, run_manager, **kwargs)
for char in output:
chunk = GenerationChunk(text=char)
if run_manager:
run_manager.on_llm_new_token(chunk.text, chunk=chunk)
yield chunk
@property
def _identifying_params(self) -> Dict[str, Any]:
"""Return a dictionary of identifying parameters."""
return {
# The model name allows users to specify custom token counting
# rules in LLM monitoring applications (e.g., in LangSmith users
# can provide per token pricing for their model and monitor
# costs for the given LLM.)
"model_name": "CustomChatModel",
}
@property
def _llm_type(self) -> str:
"""Get the type of language model used by this chat model. Used for logging purposes only."""
return "custom"