arjunanand13 commited on
Commit
0501f59
1 Parent(s): c277bbe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -27,7 +27,7 @@ from huggingface_hub import InferenceClient
27
  Loading of the LLama3 model
28
  """
29
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
30
- model_id = 'meta-llama/Meta-Llama-3-8B-Instruct'
31
  device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
32
 
33
  # set quantization configuration to load large model with less GPU memory
 
27
  Loading of the LLama3 model
28
  """
29
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
30
+ model_id = 'meta-llama/Meta-Llama-3-8B'
31
  device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
32
 
33
  # set quantization configuration to load large model with less GPU memory