while deploying the OS-Atlas-Base-7B Getting "ValueError: Unsupported model type qwen2_vl" Error

#6
by Saurabh-hiehq - opened

import boto3
import sagemaker
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri

Explicitly set the region

session = boto3.Session(aws_access_key_id="your id",
aws_secret_access_key="your key",
region_name="us-east-1") # Change region as needed
sagemaker_session = sagemaker.Session(boto_session=session)

Check the region explicitly

print(f"Region: {sagemaker_session.boto_session.region_name}")

Use this session in your Hugging Face deployment

role = "your role"
hub = {'HF_MODEL_ID': 'OS-Copilot/OS-Atlas-Base-7B', 'SM_NUM_GPUS': '1', 'TRUST_REMOTE_CODE': 'true'}

huggingface_model = sagemaker.huggingface.HuggingFaceModel(
role=role,
env=hub,
sagemaker_session=sagemaker_session,
image_uri = sagemaker.huggingface.get_huggingface_llm_image_uri(
backend="huggingface",
region="us-east-1", # Explicitly set the region here
version="2.3.1"))

try:
predictor = huggingface_model.deploy(
initial_instance_count=1,
instance_type="ml.g5.4xlarge",
container_startup_health_check_timeout=300,
)
response = predictor.predict({"inputs": "Hi, what can you help me with?"})
print("model_response::::",response)
except Exception as e:
print(f"Deployment failed: {e}")

print(predictor.predict({"inputs": "Hi, what can you help me with?"}))

########
Using above code to create inference endpoint but getting error

Sign up or log in to comment