|
from typing import Any, List |
|
|
|
from openai import AsyncOpenAI, OpenAI |
|
|
|
from ...utils.registry import registry |
|
from .base import EncoderBase |
|
|
|
|
|
@registry.register_encoder() |
|
class OpenaiTextEmbeddingV3(EncoderBase): |
|
model_id: str = "text-embedding-3-large" |
|
api_key: str |
|
dim: int = 3072 |
|
|
|
class Config: |
|
"""Configuration for this pydantic object.""" |
|
|
|
protected_namespaces = () |
|
extra = "allow" |
|
|
|
def __init__(self, /, **data: Any) -> None: |
|
super().__init__(**data) |
|
self.client = OpenAI(base_url=self.endpoint, api_key=self.api_key) |
|
self.aclient = AsyncOpenAI(base_url=self.endpoint, api_key=self.api_key) |
|
|
|
def _infer(self, data: List[str], **kwargs) -> List[List[float]]: |
|
res = self.client.embeddings.create(input=data, model=self.model_id) |
|
return [item.embedding for item in res.data] |
|
|
|
async def _ainfer(self, data: List[str], **kwargs) -> List[List[float]]: |
|
res = await self.aclient.embeddings.create(input=data, model=self.model_id) |
|
return [item.embedding for item in res.data] |
|
|