deepthought_8B_gguf_inference

Sleeping

Tobias Bergmann commited on Dec 14, 2024

Commit

6ba0c05

1 Parent(s): 457d149

pipe init

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,16 +17,15 @@ model_path = hf_hub_download(
     repo_type="model"
 )
 # Load the GGUF model
-llm = Llama(model_path=model_path)
-# Setup the pipeline
-pipe = pipeline(
-    task="text-generation",
-    model=llm, # Passes the loaded Llama model as the model
-    max_new_tokens=MAX_MAX_NEW_TOKENS, # Sets the maximum number of tokens the model generates
 )
 # Setup the engine
 #pipe = Pipeline.create(
 #   task="text-generation",

     repo_type="model"
 )
 # Load the GGUF model
+pipe = Llama(
+	n_ctx=MAX_MAX_NEW_TOKENS,
+	# n_threads=4, # Set the desired number of threads to use, defaults to number of cores
+    # n_gpu_layers = 1, # Enable to use GPU, check supported layers and GPU size.
+    # n_batch=1, # Set the batch size.
+    # use_mlock =True, # Set to False to disable locking to RAM.
+	model_path=model_path
 )
 # Setup the engine
 #pipe = Pipeline.create(
 #   task="text-generation",