while deploying the OS-Atlas-Base-7B Getting "ValueError: Unsupported model type qwen2_vl" Error
import boto3
import sagemaker
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri
Explicitly set the region
session = boto3.Session(aws_access_key_id="your id",
aws_secret_access_key="your key",
region_name="us-east-1") # Change region as needed
sagemaker_session = sagemaker.Session(boto_session=session)
Check the region explicitly
print(f"Region: {sagemaker_session.boto_session.region_name}")
Use this session in your Hugging Face deployment
role = "your role"
hub = {'HF_MODEL_ID': 'OS-Copilot/OS-Atlas-Base-7B', 'SM_NUM_GPUS': '1', 'TRUST_REMOTE_CODE': 'true'}
huggingface_model = sagemaker.huggingface.HuggingFaceModel(
role=role,
env=hub,
sagemaker_session=sagemaker_session,
image_uri = sagemaker.huggingface.get_huggingface_llm_image_uri(
backend="huggingface",
region="us-east-1", # Explicitly set the region here
version="2.3.1"))
try:
predictor = huggingface_model.deploy(
initial_instance_count=1,
instance_type="ml.g5.4xlarge",
container_startup_health_check_timeout=300,
)
response = predictor.predict({"inputs": "Hi, what can you help me with?"})
print("model_response::::",response)
except Exception as e:
print(f"Deployment failed: {e}")
print(predictor.predict({"inputs": "Hi, what can you help me with?"}))
########
Using above code to create inference endpoint but getting error