Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -28,11 +28,10 @@ class OrcaChatBot:
|
|
28 |
self.model = model
|
29 |
self.tokenizer = tokenizer
|
30 |
self.system_message = system_message
|
31 |
-
self.conversation_history = None
|
32 |
|
33 |
def predict(self, user_message, temperature=0.4, max_new_tokens=70, top_p=0.99, repetition_penalty=1.9):
|
34 |
# Prepare the prompt
|
35 |
-
prompt = f"
|
36 |
|
37 |
# Encode the prompt
|
38 |
inputs = self.tokenizer(prompt, return_tensors='pt', add_special_tokens=False)
|
@@ -48,14 +47,10 @@ class OrcaChatBot:
|
|
48 |
pad_token_id=self.tokenizer.eos_token_id,
|
49 |
do_sample=True # Enable sampling-based generation
|
50 |
)
|
51 |
-
|
52 |
|
53 |
# Decode the generated response
|
54 |
response = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
55 |
|
56 |
-
# Update conversation history
|
57 |
-
self.conversation_history = self.tokenizer.decode(output_ids[0], skip_special_tokens=False)
|
58 |
-
|
59 |
return response
|
60 |
|
61 |
Orca_bot = OrcaChatBot(model, tokenizer)
|
|
|
28 |
self.model = model
|
29 |
self.tokenizer = tokenizer
|
30 |
self.system_message = system_message
|
|
|
31 |
|
32 |
def predict(self, user_message, temperature=0.4, max_new_tokens=70, top_p=0.99, repetition_penalty=1.9):
|
33 |
# Prepare the prompt
|
34 |
+
prompt = f"system\n{self.system_message}\nuser\n{user_message}\nassistant"
|
35 |
|
36 |
# Encode the prompt
|
37 |
inputs = self.tokenizer(prompt, return_tensors='pt', add_special_tokens=False)
|
|
|
47 |
pad_token_id=self.tokenizer.eos_token_id,
|
48 |
do_sample=True # Enable sampling-based generation
|
49 |
)
|
|
|
50 |
|
51 |
# Decode the generated response
|
52 |
response = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
53 |
|
|
|
|
|
|
|
54 |
return response
|
55 |
|
56 |
Orca_bot = OrcaChatBot(model, tokenizer)
|