from huggingface_hub import InferenceClient, create_inference_endpoint | |
# Create the inference endpoint | |
endpoint = create_inference_endpoint( | |
name="my-custom-endpoint", | |
repository="path/to/your/model/repository", | |
framework="custom", | |
task="text-classification", | |
accelerator="cpu", # or "gpu" if needed | |
instance_size="medium", | |
instance_type="c6i", | |
region="us-east-1", | |
custom_image={ | |
"health_route": "/healthz", | |
"port": 8080, | |
"url": "your-docker-image-url:latest" | |
} | |
) | |
# Wait for the endpoint to be ready | |
endpoint.wait() | |
# Create a client to interact with the endpoint | |
client = InferenceClient(endpoint.url) | |
# Test the endpoint | |
result = client.text_classification("This is a test input") | |
print(result) |