helem-llm / endpoint.py
menimeni123's picture
Initial commit with model and code
924a7c9
raw
history blame
765 Bytes
from huggingface_hub import InferenceClient, create_inference_endpoint
# Create the inference endpoint
endpoint = create_inference_endpoint(
name="my-custom-endpoint",
repository="path/to/your/model/repository",
framework="custom",
task="text-classification",
accelerator="cpu", # or "gpu" if needed
instance_size="medium",
instance_type="c6i",
region="us-east-1",
custom_image={
"health_route": "/healthz",
"port": 8080,
"url": "your-docker-image-url:latest"
}
)
# Wait for the endpoint to be ready
endpoint.wait()
# Create a client to interact with the endpoint
client = InferenceClient(endpoint.url)
# Test the endpoint
result = client.text_classification("This is a test input")
print(result)