Yuto-24
/

llm-jp-3-13B-Tengentoppa_magpie

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

Yuto-24 commited on 20 days ago

Commit

84ccb33

•

1 Parent(s): 20246bd

Update README.md

Files changed (1) hide show

README.md +2 -0

README.md CHANGED Viewed

@@ -93,8 +93,10 @@ def retrieve(input_text):
 class CallLLM:
     def __init__(self, model_name_or_path: str) -> None:
         self.model = AutoModelForCausalLM.from_pretrained(
             model_name_or_path,
             trust_remote_code=True,
             device_map="auto",
         ).eval()

 class CallLLM:
     def __init__(self, model_name_or_path: str) -> None:
+        self.quantization_config = BitsAndBytesConfig(load_in_8bit=True)
         self.model = AutoModelForCausalLM.from_pretrained(
             model_name_or_path,
+            quantization_config=self.quantization_config,
             trust_remote_code=True,
             device_map="auto",
         ).eval()