fixed bug for llama-2 auth token handling
Browse files- app_modules/llm_loader.py +14 -14
app_modules/llm_loader.py
CHANGED
@@ -356,20 +356,20 @@ class LLMLoader:
|
|
356 |
model = MODEL_NAME_OR_PATH
|
357 |
|
358 |
pipe = pipeline(
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
|
374 |
self.llm = HuggingFacePipeline(pipeline=pipe, callbacks=callbacks)
|
375 |
elif self.llm_model_type == "mosaicml":
|
|
|
356 |
model = MODEL_NAME_OR_PATH
|
357 |
|
358 |
pipe = pipeline(
|
359 |
+
task,
|
360 |
+
model=model,
|
361 |
+
tokenizer=tokenizer,
|
362 |
+
streamer=self.streamer,
|
363 |
+
return_full_text=return_full_text, # langchain expects the full text
|
364 |
+
device=hf_pipeline_device_type,
|
365 |
+
torch_dtype=torch_dtype,
|
366 |
+
max_new_tokens=2048,
|
367 |
+
trust_remote_code=True,
|
368 |
+
temperature=temperature,
|
369 |
+
top_p=0.95,
|
370 |
+
top_k=0, # select from top 0 tokens (because zero, relies on top_p)
|
371 |
+
repetition_penalty=1.115,
|
372 |
+
)
|
373 |
|
374 |
self.llm = HuggingFacePipeline(pipeline=pipe, callbacks=callbacks)
|
375 |
elif self.llm_model_type == "mosaicml":
|