Spaces:
Runtime error
Runtime error
modified the llama version
Browse files- app.py +2 -2
- requirements.txt +0 -2
app.py
CHANGED
@@ -42,7 +42,7 @@ h1 {
|
|
42 |
"""
|
43 |
|
44 |
# Load the tokenizer and model with quantization
|
45 |
-
model_id = "meta-llama/Meta-Llama-3
|
46 |
bnb_config = BitsAndBytesConfig(
|
47 |
load_in_4bit=True,
|
48 |
bnb_4bit_use_double_quant=True,
|
@@ -241,7 +241,7 @@ def chat_llama3_8b(message: str, history: list, temperature: float, max_new_toke
|
|
241 |
responses = []
|
242 |
count=0
|
243 |
for chunk in chunks:
|
244 |
-
logger.info(f"Processing chunk {count+1}/{len(
|
245 |
response = generate_response_for_chunk(chunk, history, temperature, max_new_tokens)
|
246 |
responses.append(response)
|
247 |
count+=1
|
|
|
42 |
"""
|
43 |
|
44 |
# Load the tokenizer and model with quantization
|
45 |
+
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
|
46 |
bnb_config = BitsAndBytesConfig(
|
47 |
load_in_4bit=True,
|
48 |
bnb_4bit_use_double_quant=True,
|
|
|
241 |
responses = []
|
242 |
count=0
|
243 |
for chunk in chunks:
|
244 |
+
logger.info(f"Processing chunk {count+1}/{len(chunks)}")
|
245 |
response = generate_response_for_chunk(chunk, history, temperature, max_new_tokens)
|
246 |
responses.append(response)
|
247 |
count+=1
|
requirements.txt
CHANGED
@@ -2,5 +2,3 @@ accelerate
|
|
2 |
transformers
|
3 |
SentencePiece
|
4 |
bitsandbytes
|
5 |
-
torch
|
6 |
-
transformers==4.43.1
|
|
|
2 |
transformers
|
3 |
SentencePiece
|
4 |
bitsandbytes
|
|
|
|