Text Generation
Transformers
Safetensors
Japanese
English
llama
conversational
text-generation-inference
Inference Endpoints
Yuto-24 commited on
Commit
84ccb33
1 Parent(s): 20246bd

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -0
README.md CHANGED
@@ -93,8 +93,10 @@ def retrieve(input_text):
93
 
94
  class CallLLM:
95
  def __init__(self, model_name_or_path: str) -> None:
 
96
  self.model = AutoModelForCausalLM.from_pretrained(
97
  model_name_or_path,
 
98
  trust_remote_code=True,
99
  device_map="auto",
100
  ).eval()
 
93
 
94
  class CallLLM:
95
  def __init__(self, model_name_or_path: str) -> None:
96
+ self.quantization_config = BitsAndBytesConfig(load_in_8bit=True)
97
  self.model = AutoModelForCausalLM.from_pretrained(
98
  model_name_or_path,
99
+ quantization_config=self.quantization_config,
100
  trust_remote_code=True,
101
  device_map="auto",
102
  ).eval()