Spaces:
Build error
Build error
modified the chunk limit and added error handing and caching of the model
Browse files
app.py
CHANGED
@@ -6,6 +6,8 @@ import torch
|
|
6 |
from threading import Thread
|
7 |
import logging
|
8 |
import spaces
|
|
|
|
|
9 |
# Set up logging
|
10 |
logging.basicConfig(level=logging.INFO)
|
11 |
logger = logging.getLogger(__name__)
|
@@ -48,20 +50,31 @@ bnb_config = BitsAndBytesConfig(
|
|
48 |
bnb_4bit_compute_dtype=torch.bfloat16
|
49 |
)
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
try:
|
52 |
-
|
53 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
54 |
-
logger.info("Loading model...")
|
55 |
-
model = AutoModelForCausalLM.from_pretrained(
|
56 |
-
model_id,
|
57 |
-
device_map="auto",
|
58 |
-
quantization_config=bnb_config,
|
59 |
-
torch_dtype=torch.bfloat16
|
60 |
-
)
|
61 |
-
model.generation_config.pad_token_id = tokenizer.pad_token_id
|
62 |
-
logger.info("Model and tokenizer loaded successfully.")
|
63 |
except Exception as e:
|
64 |
-
logger.error(f"
|
65 |
raise
|
66 |
|
67 |
terminators = [
|
@@ -76,7 +89,7 @@ Bad JSON example: {'lobby': { 'frcm': { 'replace': [ 'carpet', 'carpet_pad', 'ba
|
|
76 |
Make sure to fetch details from the provided text and ignore unnecessary information. The response should be in JSON format only, without any additional comments.
|
77 |
"""
|
78 |
|
79 |
-
def chunk_text(text, chunk_size=
|
80 |
"""
|
81 |
Splits the input text into chunks of specified size.
|
82 |
|
@@ -185,7 +198,7 @@ with gr.Blocks(fill_height=True, css=css) as demo:
|
|
185 |
additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
|
186 |
additional_inputs=[
|
187 |
gr.Slider(minimum=0, maximum=1, step=0.1, value=0.95, label="Temperature", render=False),
|
188 |
-
gr.Slider(minimum=128, maximum=
|
189 |
]
|
190 |
)
|
191 |
|
|
|
6 |
from threading import Thread
|
7 |
import logging
|
8 |
import spaces
|
9 |
+
from functools import lru_cache
|
10 |
+
|
11 |
# Set up logging
|
12 |
logging.basicConfig(level=logging.INFO)
|
13 |
logger = logging.getLogger(__name__)
|
|
|
50 |
bnb_4bit_compute_dtype=torch.bfloat16
|
51 |
)
|
52 |
|
53 |
+
@lru_cache(maxsize=1)
|
54 |
+
def load_model_and_tokenizer():
|
55 |
+
try:
|
56 |
+
start_time = time.time()
|
57 |
+
logger.info("Loading tokenizer...")
|
58 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
59 |
+
logger.info("Loading model...")
|
60 |
+
model = AutoModelForCausalLM.from_pretrained(
|
61 |
+
model_id,
|
62 |
+
device_map="auto",
|
63 |
+
quantization_config=bnb_config,
|
64 |
+
torch_dtype=torch.bfloat16
|
65 |
+
)
|
66 |
+
model.generation_config.pad_token_id = tokenizer.pad_token_id
|
67 |
+
end_time = time.time()
|
68 |
+
logger.info(f"Model and tokenizer loaded successfully in {end_time - start_time} seconds.")
|
69 |
+
return model, tokenizer
|
70 |
+
except Exception as e:
|
71 |
+
logger.error(f"Error loading model or tokenizer: {e}")
|
72 |
+
raise
|
73 |
+
|
74 |
try:
|
75 |
+
model, tokenizer = load_model_and_tokenizer()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
except Exception as e:
|
77 |
+
logger.error(f"Failed to load model and tokenizer: {e}")
|
78 |
raise
|
79 |
|
80 |
terminators = [
|
|
|
89 |
Make sure to fetch details from the provided text and ignore unnecessary information. The response should be in JSON format only, without any additional comments.
|
90 |
"""
|
91 |
|
92 |
+
def chunk_text(text, chunk_size=5000):
|
93 |
"""
|
94 |
Splits the input text into chunks of specified size.
|
95 |
|
|
|
198 |
additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
|
199 |
additional_inputs=[
|
200 |
gr.Slider(minimum=0, maximum=1, step=0.1, value=0.95, label="Temperature", render=False),
|
201 |
+
gr.Slider(minimum=128, maximum=2000, step=1, value=700, label="Max new tokens", render=False),
|
202 |
]
|
203 |
)
|
204 |
|