Inference Providers documentation

HF Inference

Hugging Face's logo
Join the Hugging Face community

and get access to the augmented documentation experience

to get started

HF Inference

HF Inference is the serverless Inference API powered by Hugging Face. This service used to be called “Inference API (serverless)” prior to Inference Providers. If you are interested in deploying models to a dedicated and autoscaling infrastructure managed by Hugging Face, check out Inference Endpoints instead.

Supported tasks

Audio Classification

Find out more about Audio Classification here.

from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key="hf_xxxxxxxxxxxxxxxxxxxxxxxx",
)

output = client.audio_classification("sample1.flac", model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition")

Automatic Speech Recognition

Find out more about Automatic Speech Recognition here.

from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key="hf_xxxxxxxxxxxxxxxxxxxxxxxx",
)

output = client.automatic_speech_recognition("sample1.flac", model="openai/whisper-large-v3-turbo")

Chat Completion (LLM)

Find out more about Chat Completion (LLM) here.

from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key="hf_xxxxxxxxxxxxxxxxxxxxxxxx",
)

completion = client.chat.completions.create(
    model="Qwen/QwQ-32B",
    messages=[
        {
            "role": "user",
            "content": "What is the capital of France?"
        }
    ],
    max_tokens=500,
)

print(completion.choices[0].message)

Chat Completion (VLM)

Find out more about Chat Completion (VLM) here.

from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key="hf_xxxxxxxxxxxxxxxxxxxxxxxx",
)

completion = client.chat.completions.create(
    model="google/gemma-3-27b-it",
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "Describe this image in one sentence."
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
                    }
                }
            ]
        }
    ],
    max_tokens=500,
)

print(completion.choices[0].message)

Feature Extraction

Find out more about Feature Extraction here.

from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key="hf_xxxxxxxxxxxxxxxxxxxxxxxx",
)

result = client.feature_extraction(
    inputs="Today is a sunny day and I will get some ice cream.",
    model="intfloat/multilingual-e5-large-instruct",
)

Fill Mask

Find out more about Fill Mask here.

from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key="hf_xxxxxxxxxxxxxxxxxxxxxxxx",
)

result = client.fill_mask(
    inputs="The answer to the universe is undefined.",
    model="google-bert/bert-base-uncased",
)

Image Classification

Find out more about Image Classification here.

from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key="hf_xxxxxxxxxxxxxxxxxxxxxxxx",
)

output = client.image_classification("cats.jpg", model="Falconsai/nsfw_image_detection")

Image To Image

Find out more about Image To Image here.

from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key="hf_xxxxxxxxxxxxxxxxxxxxxxxx",
)

# output is a PIL.Image object
image = client.image_to_image(
    "cat.png",
    prompt="Turn the cat into a tiger.",
    model="enhanceaiteam/Flux-Uncensored-V2",
)

Object Detection

Find out more about Object Detection here.

from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key="hf_xxxxxxxxxxxxxxxxxxxxxxxx",
)

output = client.object_detection("cats.jpg", model="facebook/detr-resnet-50")

Question Answering

Find out more about Question Answering here.

from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key="hf_xxxxxxxxxxxxxxxxxxxxxxxx",
)

result = client.question_answering(
    inputs={
    "question": "What is my name?",
    "context": "My name is Clara and I live in Berkeley."
},
    model="deepset/gelectra-large-germanquad",
)

Summarization

Find out more about Summarization here.

from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key="hf_xxxxxxxxxxxxxxxxxxxxxxxx",
)

result = client.summarization(
    inputs="The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
    model="facebook/bart-large-cnn",
)

Text Classification

Find out more about Text Classification here.

from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key="hf_xxxxxxxxxxxxxxxxxxxxxxxx",
)

result = client.text_classification(
    inputs="I like you. I love you",
    model="ProsusAI/finbert",
)

Text Generation

Find out more about Text Generation here.

from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key="hf_xxxxxxxxxxxxxxxxxxxxxxxx",
)

completion = client.chat.completions.create(
    model="Qwen/QwQ-32B",
    messages="\"Can you please let us know more details about your \"",
    max_tokens=500,
)

print(completion.choices[0].message)

Text To Image

Find out more about Text To Image here.

from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key="hf_xxxxxxxxxxxxxxxxxxxxxxxx",
)

# output is a PIL.Image object
image = client.text_to_image(
    "Astronaut riding a horse",
    model="black-forest-labs/FLUX.1-dev",
)

Text To Video

Find out more about Text To Video here.

from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key="hf_xxxxxxxxxxxxxxxxxxxxxxxx",
)

video = client.text_to_video(
    "A young man walking on the street",
    model="AdamLucek/Wan2.1-T2V-14B-OldBookIllustrations",
)

Token Classification

Find out more about Token Classification here.

from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key="hf_xxxxxxxxxxxxxxxxxxxxxxxx",
)

result = client.token_classification(
    inputs="My name is Sarah Jessica Parker but you can call me Jessica",
    model="dbmdz/bert-large-cased-finetuned-conll03-english",
)

Translation

Find out more about Translation here.

from huggingface_hub import InferenceClient

client = InferenceClient(
    provider="hf-inference",
    api_key="hf_xxxxxxxxxxxxxxxxxxxxxxxx",
)

result = client.translation(
    inputs="Меня зовут Вольфганг и я живу в Берлине",
    model="facebook/nllb-200-distilled-600M",
)

Zero Shot Classification

Find out more about Zero Shot Classification here.

import requests

API_URL = "https://router.huggingface.co/hf-inference/models/facebook/bart-large-mnli"
headers = {"Authorization": "Bearer hf_xxxxxxxxxxxxxxxxxxxxxxxx"}

def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

output = query({
    "inputs": "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!",
    "parameters": {"candidate_labels": ["refund", "legal", "faq"]},
})
< > Update on GitHub