webhook-space / models.py
plaggy's picture
refactor
8b7a023
import json
import os
from pydantic import BaseModel
from typing import Literal
class EnvConfig(BaseModel):
# you token from Settings
hf_token: str = os.getenv("HF_TOKEN")
# NAME of TEI endpoint
tei_name: str = os.getenv("TEI_NAME")
# name of chunked dataset
chunked_ds_name: str = os.getenv("CHUNKED_DS_NAME")
# name of embeddings dataset
embed_ds_name: str = os.getenv("EMBED_DS_NAME")
# splits of input dataset to process, comma separated
input_splits: str = os.getenv("INPUT_SPLITS")
# name of column to load from input dataset
input_text_col: str = os.getenv("INPUT_TEXT_COL")
class ChunkConfig(BaseModel):
strategy: Literal["recursive", "sequence", "constant"]
split_seq: str
chunk_len: int
private: bool
class EmbedConfig(BaseModel):
private: bool
semaphore_bound: int
class WebhookPayloadEvent(BaseModel):
action: Literal["create", "update", "delete"]
scope: str
class WebhookPayloadRepo(BaseModel):
type: Literal["dataset", "model", "space"]
name: str
id: str
private: bool
headSha: str
class WebhookPayload(BaseModel):
event: WebhookPayloadEvent
repo: WebhookPayloadRepo
with open(os.path.join(os.getcwd(), "configs/chunk_config.json")) as c:
data = json.load(c)
chunk_config = ChunkConfig.model_validate_json(json.dumps(data))
with open(os.path.join(os.getcwd(), "configs/embed_config.json")) as c:
data = json.load(c)
embed_config = EmbedConfig.model_validate_json(json.dumps(data))
env_config = EnvConfig()
env_config.input_splits = [spl.strip() for spl in env_config.input_splits.split(",") if spl]