Daniel Marques commited on
Commit
c77782f
1 Parent(s): cb776ef

feat: add history

Browse files
Files changed (3) hide show
  1. constants.py +2 -3
  2. main.py +1 -1
  3. run_localGPT.py +4 -2
constants.py CHANGED
@@ -35,15 +35,14 @@ MAX_NEW_TOKENS = CONTEXT_WINDOW_SIZE # int(CONTEXT_WINDOW_SIZE/4)
35
 
36
  #### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing
37
 
38
- N_GPU_LAYERS = 40 # Llama-2-70B has 83 layers
39
- N_BATCH = 1024
40
 
41
  ### From experimenting with the Llama-2-7B-Chat-GGML model on 8GB VRAM, these values work:
42
  # N_GPU_LAYERS = 20
43
  # N_BATCH = 512
44
 
45
 
46
-
47
  # https://python.langchain.com/en/latest/_modules/langchain/document_loaders/excel.html#UnstructuredExcelLoader
48
  DOCUMENT_MAP = {
49
  ".txt": TextLoader,
 
35
 
36
  #### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing
37
 
38
+ N_GPU_LAYERS = 50 # Llama-2-70B has 83 layers
39
+ N_BATCH = 2048
40
 
41
  ### From experimenting with the Llama-2-7B-Chat-GGML model on 8GB VRAM, these values work:
42
  # N_GPU_LAYERS = 20
43
  # N_BATCH = 512
44
 
45
 
 
46
  # https://python.langchain.com/en/latest/_modules/langchain/document_loaders/excel.html#UnstructuredExcelLoader
47
  DOCUMENT_MAP = {
48
  ".txt": TextLoader,
main.py CHANGED
@@ -62,7 +62,7 @@ Question: {question}
62
 
63
  memory = ConversationBufferMemory(input_key="question", memory_key="history")
64
 
65
- QA_CHAIN_PROMPT = PromptTemplate.from_template(input_variables=["history", "context", "question"], template=template)
66
 
67
  QA = RetrievalQA.from_chain_type(
68
  llm=LLM,
 
62
 
63
  memory = ConversationBufferMemory(input_key="question", memory_key="history")
64
 
65
+ QA_CHAIN_PROMPT = PromptTemplate(input_variables=["history", "context", "question"], template=template)
66
 
67
  QA = RetrievalQA.from_chain_type(
68
  llm=LLM,
run_localGPT.py CHANGED
@@ -79,7 +79,7 @@ def load_model(device_type, model_id, model_basename=None, LOGGING=logging):
79
 
80
  # Create a pipeline for text generation
81
 
82
- streamer = TextStreamer(tokenizer)
83
 
84
  pipe = pipeline(
85
  "text-generation",
@@ -91,7 +91,9 @@ def load_model(device_type, model_id, model_basename=None, LOGGING=logging):
91
  top_k=40,
92
  repetition_penalty=1.0,
93
  generation_config=generation_config,
94
- streamer=streamer
 
 
95
  )
96
 
97
  local_llm = HuggingFacePipeline(pipeline=pipe)
 
79
 
80
  # Create a pipeline for text generation
81
 
82
+ streamer = TextStreamer(tokenizer, skip_prompt=True)
83
 
84
  pipe = pipeline(
85
  "text-generation",
 
91
  top_k=40,
92
  repetition_penalty=1.0,
93
  generation_config=generation_config,
94
+ streamer=streamer,
95
+ num_return_sequences=1,
96
+ eos_token_id=tokenizer.eos_token_id
97
  )
98
 
99
  local_llm = HuggingFacePipeline(pipeline=pipe)