Spaces:

Marroco93
/

PacmanAI-2

Sleeping

Marroco93 commited on Apr 18, 2024

Commit

4cc4589

1 Parent(s): 02ccf98

t

Files changed (1) hide show

main.py CHANGED Viewed

@@ -33,13 +33,20 @@ def format_prompt(current_prompt, history):
 def generate_stream(item: Item) -> Generator[bytes, None, None]:
     formatted_prompt = format_prompt(f"{item.system_prompt}, {item.prompt}", item.history)
     generate_kwargs = {
         "temperature": item.temperature,
-        "max_new_tokens": item.max_new_tokens,
         "top_p": item.top_p,
         "repetition_penalty": item.repetition_penalty,
         "do_sample": True,
-        "seed": 42,  # Adjust or omit the seed as needed
     }
     # Stream the response from the InferenceClient

 def generate_stream(item: Item) -> Generator[bytes, None, None]:
     formatted_prompt = format_prompt(f"{item.system_prompt}, {item.prompt}", item.history)
+    # Estimate token count for the formatted_prompt
+    input_token_count = len(formatted_prompt.split())  # Simple whitespace tokenization, adjust if necessary
+    # Ensure total token count doesn't exceed the maximum limit
+    max_tokens_allowed = 32768
+    max_new_tokens_adjusted = max(1, min(item.max_new_tokens, max_tokens_allowed - input_token_count))
     generate_kwargs = {
         "temperature": item.temperature,
+        "max_new_tokens": max_new_tokens_adjusted,
         "top_p": item.top_p,
         "repetition_penalty": item.repetition_penalty,
         "do_sample": True,
+        "seed": 42,
     }
     # Stream the response from the InferenceClient