Tobias Bergmann commited on
Commit
6ba0c05
·
1 Parent(s): 457d149
Files changed (1) hide show
  1. app.py +7 -8
app.py CHANGED
@@ -17,16 +17,15 @@ model_path = hf_hub_download(
17
  repo_type="model"
18
  )
19
  # Load the GGUF model
20
- llm = Llama(model_path=model_path)
21
-
22
- # Setup the pipeline
23
- pipe = pipeline(
24
- task="text-generation",
25
- model=llm, # Passes the loaded Llama model as the model
26
- max_new_tokens=MAX_MAX_NEW_TOKENS, # Sets the maximum number of tokens the model generates
27
  )
28
 
29
-
30
  # Setup the engine
31
  #pipe = Pipeline.create(
32
  # task="text-generation",
 
17
  repo_type="model"
18
  )
19
  # Load the GGUF model
20
+ pipe = Llama(
21
+ n_ctx=MAX_MAX_NEW_TOKENS,
22
+ # n_threads=4, # Set the desired number of threads to use, defaults to number of cores
23
+ # n_gpu_layers = 1, # Enable to use GPU, check supported layers and GPU size.
24
+ # n_batch=1, # Set the batch size.
25
+ # use_mlock =True, # Set to False to disable locking to RAM.
26
+ model_path=model_path
27
  )
28
 
 
29
  # Setup the engine
30
  #pipe = Pipeline.create(
31
  # task="text-generation",