v1
Browse files
app.py
CHANGED
@@ -44,7 +44,6 @@ def threading_function(inputs, streamer, device, model, tokenizer, temperature,
|
|
44 |
generation_kwargs.update({'max_new_tokens': new_max_token})
|
45 |
generation_kwargs.update({'top_p': top_p})
|
46 |
generation_kwargs.update({'temperature': temperature})
|
47 |
-
generation_kwargs.update({'use_cache': True})
|
48 |
return model.generate(**generation_kwargs)
|
49 |
|
50 |
@spaces.GPU
|
|
|
44 |
generation_kwargs.update({'max_new_tokens': new_max_token})
|
45 |
generation_kwargs.update({'top_p': top_p})
|
46 |
generation_kwargs.update({'temperature': temperature})
|
|
|
47 |
return model.generate(**generation_kwargs)
|
48 |
|
49 |
@spaces.GPU
|