Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
|
|
2 |
import torch
|
3 |
import pandas as pd
|
4 |
import plotly.graph_objects as go
|
|
|
5 |
from plotly.subplots import make_subplots
|
6 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
7 |
import time
|
@@ -73,7 +74,8 @@ def truncate_context(input_ids, max_length=2048):
|
|
73 |
if len(input_ids[0]) > max_length:
|
74 |
return input_ids[:, -max_length:]
|
75 |
return input_ids
|
76 |
-
|
|
|
77 |
def generate_response(message, chat_history, epsilon):
|
78 |
global data, stop_generation
|
79 |
data = pd.DataFrame(columns=["Time taken (in ms)", "Early exit depth", "Token"])
|
|
|
2 |
import torch
|
3 |
import pandas as pd
|
4 |
import plotly.graph_objects as go
|
5 |
+
import spaces
|
6 |
from plotly.subplots import make_subplots
|
7 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
8 |
import time
|
|
|
74 |
if len(input_ids[0]) > max_length:
|
75 |
return input_ids[:, -max_length:]
|
76 |
return input_ids
|
77 |
+
|
78 |
+
@spaces.GPU
|
79 |
def generate_response(message, chat_history, epsilon):
|
80 |
global data, stop_generation
|
81 |
data = pd.DataFrame(columns=["Time taken (in ms)", "Early exit depth", "Token"])
|